
\documentstyle [12pt,amstex]{article} \def\email#1{\relax}\let\address\email
%\www{http://www.tu-graz.ac.at/rote}
%\documentstyle {amsart}
\input german.sty
%%%%%%\selectlanguage{USenglish}
\textwidth 15,5 true cm
\textheight24 true cm
\hoffset -10mm
\voffset -25mm
\newtheorem{lemma} {Lemma}
\newtheorem{theorem} {Theorem}
\newtheorem{corollary} {Corollary}

\def\proof{\noindent {\em Proof}}
\def\affe{@@}


\begin{document}

\title
%   [Counting binary trees by degree sequence] %\shorttitle
%
%
            {Binary trees having a given number
            of nodes with 0, 1, and 2~children.
%%%%%\\ \em         DRAFT --- PLEASE DISTRIBUTE
}
%
\author               {G\"unter Rote%
%
\thanks{Institut f\"ur Mathematik, Technische Universit\"at Graz,
Steyrergasse 30, A-8010~Graz, Austria.
\hskip0pt plus 20cm\penalty0 \hskip0pt plus -20cm
Electronic mail: {\tt rote\protect\affe opt.math.tu-graz.ac.at}.
\hskip0pt plus 20cm\penalty0 \hskip0pt plus -20cm
World-Wide~Web:~{\tt http://www.tu-graz.ac.at/rote}.
}}


\address{Institut f\"ur Mathematik (501~B)\\Technische Universit\"at Graz\\
Steyrergasse 30\\A-8010~Graz\\Austria}
\email{\tt rote@@opt.math.tu-graz.ac.at}
%\www {\tt http://www.tu-graz.ac.at/rote}

\date{13. August 1997}

\maketitle

%\def\Ipe#1{\def\IPEfile{#1}\input{\IPEfile}}
%\def\Ipe#1{{\def \makebox (0,0)[lb]##1{\vbox {\hbox {##1}\kern 0pt}}%
%            \def\IPEfile{#1}\input{\IPEfile}}}

\def\qedbox{\vrule width 0.61em depth 0.12em height 0.49em}
\def\qed {\ifinner \noalign {\global\dimen1=\prevdepth
\ifdim\prevdepth=-1000pt \global\dimen1=0pt \fi
\vskip-\dimen1 \nointerlineskip}
\omit\span\omit \hfil
\vbox to -\dimen1{\vss
    \hbox to 0pt {\hss \hbox to \hsize{\hfill\qedbox}\hss}}\hfil\crcr
% funktionirt auch in \eqalign (nach dem letzten \cr)
\else \ifmmode \eqno\qedbox
       \else \qquad\hbox{}\nobreak\hfill \nopagebreak \qedbox\par \smallskip
            \fi\fi}


\begin{abstract}
We give three combinatorial proofs for the number of binary trees having a
given number of nodes with 0, 1, and 2~children.
We extend these results to ordered trees with given distribution of nodes
according to their numbers of children.
\end{abstract}

\section{Introduction}

A {\em binary tree} is either empty, or
it consists of a {\em root node\/} and two binary trees, called the left
subtree and the right subtree.
A nonempty binary tree can also be regarded as a rooted directed tree graph
(arborescence) where the outgoing arcs of each node are labeled by distinct
labels from the set $\{ {\mathrm L, \mathrm R}\}$.
(We take the arcs as directed away from the root.)

\begin{theorem} \label{lem-bintrees}
The number of binary trees having
$i$~nodes with $2$~children,
$j$~nodes with $1$~child, and
$k=i+1$ nodes without children, is
$$ 2^j \binom {2i+j}{j} b_i = \binom {n} {i \ j \ k} \frac{2^j}{n},$$
where $b_i = \binom {2i}{i-1}/i$ is the $i$-th Catalan number
and $n=i+j+k$ is the total number of nodes.
\end{theorem}
Note that $k$ must always equal $i+1$ in a binary tree.

Prodinger~\cite{P} recently computed the probability that a random
binary tree with $n$ nodes has
$i$~nodes with 2~children (and hence $i+1$ nodes without children and
$n-2i-1$~nodes with 1~child). Since the total number of binary trees with $n$
nodes is known---it is~$b_n$---, his formulas can be derived easily from the
above theorem and vice versa.
Prodinger proved his results by simplifying sums of expressions involving
binomial coefficients that were derived by Mahmoud~\cite{M}.

This paper contains three independent combinatorial proofs of
Theorem~\ref{lem-bintrees}.
The first proof is very simple; it it based on the fact that nodes with one
child don't really add very much to the combinatorial structure of binary
trees and can be easily eliminated, thus reducing
the problem to the case where $j=0$,
for which the answer is known to be the Catalan numbers.
The second proof relates the trees to be counted to labeled trees with given
vertex degrees; the third proof relates them to integer sequences with certain
properties.
These sequences can be counted very easily by observing that, in each
equivalence class under cyclic rotations, there is precisely one sequence that
has the required properties (Rotation Principle).

The second and third proofs can be generalized to ordered trees with arbitrary
node degrees.


\section {The first proof % of Lemma~\protect\ref{lem-bintrees}}
--- binary trees}

Consider one of the binary trees $B$ which are counted in the theorem.
A vertex with 1~child in a binary tree can be eliminated by ``short-cutting''
it, connecting its child directly with its parent.
To avoid problems when trying to eliminate the root node in this way,
let us attach to $B$ an artificial new root, whose
only child is the original root node, thus getting a {\em planted\/} binary
tree.
Now, by % successively
eliminating all $j$ nodes with one child (except the
artifical root),
we get a unique planted tree $B'$ with the artificial root plus $2i+1$ nodes,
each having
either 0 or 2 children. (Viewed as graphs, $B$ and $B'$ are homeomorphic.)
Each edge of $B'$ corresponds to a chain of one or more edges in $B$.
The number of possible trees $B'$ is~$b_i$.
To reconstruct $B$ from its transformed tree $B'$, we have to distribute
$j$ new nodes over the $2i+1$ edges of~$B'$.
There are $\binom {j+2i}{j}$ ways to do this.
The factor $2^j$ accounts for the fact that each new node can have either a
left child or a right child.

\section {The second proof
--- $d$-ary trees}

The second proof establishes a correspondences with
{\em labeled trees\/} with
$n$ vertices of given degrees $d_1,\ldots,d_n$, whose number is the
multinomial coefficient
\begin{equation}   \label{eq-lab-trees}
\binom {n-2}
{d_1-1 \quad d_2-1 \ \cdots \ d_n-1},
\end{equation}
see for example Moon~\cite{Moon}.

This proof works more generally for $d$-ary trees,
and the resulting theorem generalizes Theorem~\ref{lem-bintrees}.
A nonempty {\em $d$-ary tree\/} is a rooted directed tree
where the (at most~$d$) outgoing arcs of each node are labeled by distinct
labels from the set $\{1,\ldots,d\}$.

\begin{theorem} \label{th-d-ary}
The number of $d$-ary trees with $n$ nodes,
having $n_i$ nodes with $i$ children, for
$i=0,1,\ldots,d$, is
$$ \frac 1n
\cdot
\binom {n}
{n_0 \ n_1 \ \cdots \ n_d}
\cdot
\textstyle
\binom d0 ^{n_0}
\binom d1 ^{n_1}
\binom d2 ^{n_2} \cdots
\binom dd ^{n_d}
,
$$
if $\sum_{i=0}^d n_ii=n-1$.
If this equation does not hold, there are no such trees.
\end{theorem}
\proof.
We augment the $d$-ary tree to a planted tree as in the previous section by
adding an artificial
root, and then we {\em label} the augmented tree $B$ by giving the special
label $0$ to the artificial root, and the labels $(i,1),\ldots,(i,n_i)$ to the
nodes with $i$ children, in arbitrary order. This procedure gives rise to a
{\em
labeled $d$-ary tree}. The number of labeled $d$-ary trees obtained in
this way is $n_0!n_1!\ldots n_d!$ times the number of trees that we want to
determine.
On the other hand,
a labeled $d$-ary tree can be constructed from a rooted labeled tree
$T$ (viewed as a graph), where
$n_0+1$ vertices have degree~1 (including the artificial root),
$n_1$ vertices have degree~2, and
$n_2$ vertices have degree~3, etc.
They have a total of $n+1$ nodes, and by (\ref{eq-lab-trees}),
their number is
$$\frac{(n-1)!} { 0!^{n_0+1} 1!^{n_1} 2!^{n_2}\ldots d!^{n_d} }.$$
For each such tree, we have to select labels for the arcs going from
each node to its children.
This can be done in
$$ 1^{n_0} \cdot
d ^{n_1} \cdot
\bigl(d(d-1)\bigr) ^{n_2} \cdots
(d!) ^{n_d}
=
\frac{d!^n} { d!^{n_0} (d-1)!^{n_1} (d-2)!^{n_2}\ldots 0!^{n_d} }
$$ ways.
%Each tree $B$ can be labeled in $i!j!k!$ different ways. (Remember that each
%label corresponds to a specified degree, and the artificial root is
%distinguished.)
%However, when we swap the two subtrees of a node with one child or two
%children, this is not reflected in the labeled tree, where all edges incident
%with a vertex are considered as equal.
%Therefore, each labeled tree is generated $2^{i+j}$ times.
%
Putting everything together, we get
%
%$$\frac{(n-1)!}{2^i} \cdot \frac 1 {i!j!k!} \cdot 2^{i+j},
%$$ which is the second expression of the theorem.
%This approach will also extend to ternary and higher trees with given degree
%sequences.
%
the theorem.
\qed

\begin{corollary} % {\textrm{\cite{d-ary-trees}}}
The number of $d$-ary trees with $n$ nodes is
$$\binom {dn}{n-1} \frac 1n.$$
\end{corollary}
This result was obtained by many authors independently
in different forms. For example,
Harary, Palmer, and Read~\cite{HPR} considered partitions of a convex polygon
into $(d+1)$-gons without introducing additional vertices.
Beineke and Pippert~\cite{BP1,BP2} considered partitions of a $d$-ball into
$d$-simplices.
All these proofs use generating functions.

\section {The third proof --- ordered trees}

An {\em ordered tree\/} is a rooted tree where
the arcs from each node to its
children are numbered consecutively starting with~1. So the first child is
distinguished from the second child and so on.

\begin{theorem} \label{th-ordered}
The number of ordered trees with $n$ nodes,
having $n_i$ nodes with $i$ children, for
$i=0,1,\ldots$, is
$$ \frac 1n
\binom {n}
{n_0 \ n_1 \ n_2 \ \cdots },$$
if $\sum_{i\ge0} n_i(i-1)=-1$.
If this equation does not hold, there are no such trees.
\end{theorem}
%
This theorem is equivalent to Theorem~\ref{th-d-ary},
since in a $d$-ary tree the arcs from a node to its $i$ children
can be labeled by any subset of $i$ numbers from the set $\{1,\ldots,d\}$,
not just by the labels $\{1,\ldots,i\}$.
Therefore, to get the number of $d$-ary trees,
the number of ordered trees must simply be multiplied
by if $\prod_{i=0}^d \binom di ^{n_i}$
to account for the additional choices of labels.

Tutte~\cite{Tutte} obtained this result by using generating functions,
% Harary in MR32#4044: "elegant and elementary combinatorial methods
%                       using differential calculus"
see also Goulden and Jackson~\cite[2.7.7, pp.~112--113] {GJ}.

However, we give another completely independent and elementary proof.

\proof { \em of Theorem~\ref{th-ordered}.}
We construct a bijection between ordered trees and certain sequences of
integers.
A~{\em preorder traversal\/} of an ordered tree starts at the root, and then
successively traverses the first subtree, the second subtree, etc., in
preorder. If we note the number of children of each node in preorder, we get
a sequence $(c_1,\ldots,c_d)$ containing
$n_i$ elements equal to $i$, for $i=0,1,\ldots$. From this sequence, the
ordered tree can be uniquely reconstructed: We start at the root and generate
its $c_1$ children. Then we go to the first child and add $c_2$ children to
it, and so on. When we come to a node with $c_k=0$ children, we backtrack to
the next node for which the number of children has not been determined, and
generate $c_{k+1}$ children for it.

However, not every sequence corresponds to a tree.
Consider a sequence starting with $2,0,1,0,0,4,0,\ldots$.
After the fourth node has been finished with $c_4=0$ children, there is no
fifth node whose number of children is to be determined as $c_5=0$.
One can characterize this situation by observing the number $t$ of
{\em ``unfinished'' nodes\/} as the construction proceeds.
Initially we have $t_0=1$ unfinished node, the root.
In the $k$-th step we finish one node by determining its number of children,
but we generate $c_k$ new unfinished nodes. Thus,
$t_{k}=t_{k-1}-1+c_k$. The construction can go on as long as there is
at least one unfinished node, i.e., $t_k\ge 1$, except at the last step,
where we have $t_n=0$, by the condition on the numbers $n_i$.
This is summarized in the following lemma, where we have for later convenience
substituted the sequence $c_k$ by $a_k:=c_k-1$ and replaced $t_k$ by
$s_k:=t_k-1$.

\begin{lemma} \label{lem-equiv}
The number of trees in Theorem~\ref{th-ordered} equals the number of sequences
$a_1,\ldots,a_n$ containing $n_i$ elements equal to $i-1$, for $i=0,1,\ldots$,
such that the partial sums $s_k=a_1+\cdots+a_k$ are nonnegative, for
$k=1,\ldots,n-1$.
\qed
\end{lemma}

The total number of sequences of $n$ elements with
$n_i$ elements equal to $i-1$, for $i=0,1,\ldots$,
without regarding the nonnegativity condition, is
$\binom {n}
{n_0 \ n_1 \ n_2 \ \cdots }$.
This is by a factor $n$ more than the number that we want.
Thus, it appears that one out of $n$ sequences should satisfy the
nonnegativity condition of the partial sums.
This is proved by the next lemma, which
I call the Rotation Principle, in analogy to Andr\'e's Reflection
Principle~\cite{Andre}, see~\cite[exercise 5.3.7, pp.~533--534] {GJ},
% Knuth, Fundamental Algorithms, 2nd ed., ex. 2.2.1.4, p.~531
which is another principle that is useful for counting
sequences subject to certain geometric restictions on the paths they generate.
In contrast to this principle, which is applicable to $\pm1$-sequences
(possibly with the inclusion of zeros), the following lemma does not have this
restriction.

\begin
{lemma}
[Rotation Principle]
\label {rotprin}
Consider a sequence $a_1,\ldots,a_n$ of integers with
$a_1+\cdots+a_n=-1$.
\begin{enumerate}
\item \label{1}
All $n$ cyclic rotations
$a_{j+1},a_{j+2},\ldots,a_n,a_1,\ldots,a_{j}$ are distinct.
\item \label{2}
Among the $n$ cyclic rotations there is precisely one whose first $n-1$
partial sums are nonnegative.
\end{enumerate}
\end{lemma}
%
\begin{figure}[ht]
\centering
\unitlength=1,4pt

\begin{picture}(250,70)(-5,-45)
\newdimen\stepsize
\stepsize=10sp % ganzzalig
\newcount\stepnum \stepnum=\stepsize
\newcount\xx
\newcount\yy
\count\xx=0
\count\yy=0
\def\step #1{
  \put(\xx,\yy){\line(1,#1){\stepnum}}
  \advance\xx by \stepsize
  \dimen0=#1\stepsize
  \advance\yy\dimen0
  \put(\xx,\yy){\circle*{2,8}}
}
\thicklines
\linethickness {0,3pt}
\multiput (-5,20)(0,-10){7}{\line(1,0){250}}
\multiput (0,25)(10,0){25}{\line(0,-1){70}}
\thicklines
\put(\xx,\yy){\circle*{4}}
{\linethickness {1pt} \put(\xx,27){\line(0,-1){74}}}
%(2,0,1,0,0,4,0,0,3,1,0,0)
\step 1 \step {-1} \step 0 \step {-1} \step {-1}
\step 3 \step {-1} \step {-1} \step2 \step0 \step {-1} \step {-1}
\put(\xx,\yy){\circle*{4}}
{\linethickness {1pt} \put(\xx,27){\line(0,-1){74}}}
\step 1 \step {-1} \step 0 \step {-1} \step {-1}
\step 3 \step {-1} \step {-1} \step2 \step0 \step {-1} \step {-1}
\put(\xx,\yy){\circle*{4}}
{\linethickness {1pt} \put(\xx,27){\line(0,-1){74}}}
%
\multiput(50,-20)(2.4,-0.2){83}{\circle*{0.9}}
\multiput(50,-20)(-2.4,0.2){25}{\circle*{0.9}}
\thinlines
\put(46,-23){\line(1,0){112}}
\put(158,-23){\line(1,-1){14}}
\end{picture}
\caption {The path corresponding to two cycles of a sequence.}
\label{figrot}
\end{figure}
%
\proof.
Let us draw a polygonal path on a plane grid with a sequence of steps in the
directions $(1,a_i)$, starting at the point $(0,0)$. Figure~\ref{figrot}
shows two consecutive cycles corresponding to the sequence
$(1,-1,0,-1,-1,3,-1,-1,2,0,-1,-1)$.
%(2,0,1,0,0,4,0,0,3,1,0,0)
The sequence goes through the points $(k,s_k)$ and terminates at the point
$(n,-1)$. Each successive cycle is shifted by the vector $(n,-1)$.
Now, the condition $s_k\ge0$ for $1\le k<n$ is equivalent to
$s_k\ge-k/n$ for all~$k$, i.e., all points $(k,s_k)$ must lie above
the line of slope $-1/n$ through the origin.
This condition holds not only for the $n$ points obtained from
the original sequence, but for the infinite set of points obtained by
continuing the given sequence indefinitely in a cyclic fashion.
For a cyclically shifted sequence starting at $a_{j+1}$, the condition is
fulfilled if and only if all points lie above
the line of slope $-1/n$ through the point $(j,s_j)$.
Thus we find the unique starting point by approaching the point set from
below with a line of slope $-1/n$ until it touches the first point.
This line contains only a single point (among the first $n$~points), and
therefore the rotated sequence with nonnegative partial sums is unique.

It we take this sequence as the starting sequence, then
all its $n$ cyclic rotations are distinct, since the amount by
which they were rotated from the starting sequence
can be recovered uniquely from each rotated sequence.
\qed

If the total sum of $-1$ is replaced by some other number,
the first statement of the lemma remains true as long as that number is
relatively prime to~$n$. One may even take arbitrary integral vectors instead
of just the vectors of the form $(1,a_i)$.

The idea to select a good starting point for going through a cyclic sequence
is not new; it appears for example as the solution to an algorithmic problem
of recreational mathematics, see Dewdney~\cite{Dewdney}.
% "auf W\"ustenpatrouille"

With the help of the two previous lemmas, the proof of the theorem can now be
completed easily.
If we regard sequences that can be obtained from each other by cyclic
rotations as equivalent, then,
by Lemma~\ref{rotprin}.\ref{1},
each equivalence class contains $n$ sequences, and
by Lemma~\ref{rotprin}.\ref{2},
each equivalence class contains precisely one sequence which corresponds to a
tree. Therefore the total number of sequences must be divided by~$n$, and we
get the theorem.
\qed

One can also follow the argument of Theorem~\ref{th-d-ary} in the reverse
direction, and one gets then from Theorem~\ref{th-ordered} an
independent proof for the number (\ref {eq-lab-trees}) of labeled trees with
given degrees.

\begin{corollary} {\normalshape {\cite{ordered-trees}}}
The number of ordered trees with $n$ nodes is the $(n-1)$-st Catalan number
$b_{n-1} = \binom {2n-2}{n-1}/n$.
\end{corollary}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{thebibliography}{[HPR]}

% {\makeatletter
% % \def\@lbibitem[#1]#2{\item[\@biblabel{#1}\hfill]\if@filesw
% %       {\def\protect##1{\string ##1\space}\immediate
% %        \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces}
% \global \def\@lbibitem[#1]#2#3\endauthor
% {\item[\ignorespaces #3\unskip\ \@biblabel{#1}\hfill]\ \null\\\if@filesw
%       {\def\protect##1{\string ##1\space}\immediate
%        \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces}
% }

\def\endauthor {\unskip, \newblock}
\def\endauthor {\unskip. \newblock}


\bibitem [A]{Andre}
{\sc D. Andr\'e}
\endauthor
Solution directe du probl\`eme r\'esolu par M.~Bertrand.
\newblock
{\sl C.~R.~Acad. Sci. Paris} {\bf 105} (1887), 436--437.

\bibitem[BP1]{BP1}
{\sc L. W. Beineke and R. R. Pippert}
\endauthor
Enumerating labeled $k$-dimensional trees and ball dissections.
\newblock
in: {\sl Proc. 2nd Chapel Hill Conf. on Combinatorial Mathematics and its
Applications}, Univ. North Carolina, Chapel Hill, 1970, pp.~12--26.

\bibitem[BP2]{BP2}
{\sc L. W. Beineke and R. R. Pippert}
\endauthor
The number of labeled dissections of a $k$-ball.
\newblock
{\sl Math. Ann.} {\bf 191} (1971), 87--98.

\bibitem[D]{Dewdney}
{\sc A. K. Dewdney}
\endauthor
Computer Recreations.
\newblock
{\sl Scientific American} {\bf 256}, 6 (June 1987), 106--109.
Solution in
% {\bf 257}, 4 (October 1987), p.~169, and
{\bf 257}, 5 (November 1987), p.~122.
German translation: Computer-Kurzweil.
\newblock
{\sl Spektrum der Wissenschaft},
September 1987, 6--10;
% January 1988, p.~12;
February 1988, p.~13.

\bibitem[GJ]{GJ}
{\sc I. P. Goulden and D. M. Jackson}
\endauthor
{\sl Combinatorial Enumeration}.
\newblock
Wiley-Interscience, New York 1983.

\bibitem[HPR]{HPR}
{\sc F. Harary, E. M. Palmer, and R. C. Read}
\endauthor
On the cell-growth problem for arbitrary polygons.
\newblock
{\sl Discr. Math.} {\bf 11} (1975), 371--389.

\bibitem[M]{M}
{\sc H. M. Mahmoud}
The joint distribution of the three types of nodes in uniform binary trees.
\newblock
{\sl Algorithmica} {\bf 13} (1995), 313--323.

\bibitem[M]{Moon}
{\sc J. W. Moon}
\endauthor
{\sl Counting Labelled Trees}.
\newblock
Canadian Mathematical Monographs, no.~1.
Canadian Mathematical Congress, London and Beccles, 1970.

\bibitem[P]{P}
{\sc H. Prodinger}
\endauthor
A note on the distribution of the three types of nodes in uniform binary trees.
\newblock
{\sl S\'eminaire Lotharingien de Combinatoire} {\bf 38b}, (1996),
5~pages.
\null\hfil\hbox{\tt
http://cartan.u-strasbg.fr/\char`\~slc/wpapers/s38proding.html}

\bibitem[T]{Tutte}
{\sc W. T. Tutte}
\endauthor
The number of planted plane trees with a given partition.
\newblock
{\sl Amer. Math. Monthly} {\bf 71} (1964), 272--277.

\end{thebibliography}

\end{document}

