% PAPER.TEX
\magnification=\magstep1
\nopagenumbers
\hoffset=-9pt
\voffset=-13pt
\font\twelvebf=cmbx10 scaled\magstep1
\font\eightrm=cmr8
\font\eightbf=cmbx8
\font\sevenrm=cmr7
\let\cl=\centerline
\let\s=\scriptstyle
\def\mean{{\rm I\kern-.18em E\,}}
\def\natural{{\rm I\kern-.18em N}}
\def\integer{{\rm Z\kern-.32em Z}}
\def\real{{\rm I\kern-.18em R}}
\def\tr{{\rm tr}}
\def\half{{1\over 2}}
\def\SS{{\cal S}}
\def\OO{{\cal O}}
\def\oo{{\s\cal O}}
\def\intdpm{\Bigl({N\beta\over 2\pi}\Bigr)^{p/2}\!\!\int\!d^pm\,}
% ----------------------
\def\hamilt{1}
\def\efree{2}
\def\fdelta{3}
\def\ulbound{4}
\def\amunu{5}
\def\abound{6}
\def\pfun{7}
\def\lbound{8}
\def\pizero{9}
\def\cnofp{10}
\def\etran{11}
\def\rAGS{1}
\def\rSTa{2}
\def\rSTb{3}
% ----------------------
\cl{{\twelvebf A Free Energy Bound for the Hopfield Model}}
\vskip.7in
\cl{Hans Koch
\footnote{$^*$}
{{\sevenrm Supported in Part by the
National Science Foundation under Grant No. DMS--9103590.}}}
\cl{Department of Mathematics, University of Texas at Austin}
\cl{Austin, TX 78712}
\vskip.8in\baselineskip=9.5pt
{\narrower\noindent{\eightbf Abstract.\ }\eightrm
We give a simple upper and lower bound on the free energy density
of the Hopfield model of size $\s N$ with $\s p$ stored patterns,
in the limit where $\s N$ and $\s p$ tend to infinity
with $\s p/N\to\alpha<1$.
The two bounds coincide for $\s\alpha=0$.\par}
%-----------------------
\vskip.9in\noindent
\baselineskip=11.5pt plus 1pt minus 1pt
\advance\hsize by 18pt
The Hamiltonian of the Hopfield model with $p$ stored $N$--bit patterns
is a random function on the set $\SS=\{+1,-1\}^N$,
with values given by the equation
$$
H_{p,N,\xi}(S)=-{1\over 2N}\sum_{i,j=1}^N\sum_{\mu=1}^p
\xi_i^\mu\xi_j^\mu S_iS_j\,,\qquad S\in\SS\,,
\eqno(\hamilt)
$$
where $\xi$ is a $p\times N$ matrix whose elements $\xi_i^\mu$ are
independent random variables with values $\pm 1$ and mean zero.
The quantity considered here is the expected value of the free energy density
$$
f_{p,N,\beta}(\xi)=-{1\over\beta N}\ln\Bigl[\sum_{S\in\SS}
e^{-\beta H_{p,N,\xi}(S)}\Bigr]
\eqno(\efree)
$$
at positive inverse temperatures $\beta$.
In order to state our result, we define
$$
\phi_\delta(\beta,h)=-{1\over\beta}\ln\bigl[2\cosh(\beta h)\bigr]
+{1-\delta\over 2}h^2\,.
\eqno(\fdelta)
$$
\vskip.1in\noindent{\bf Theorem.\ }
{\it Let $0\le\alpha<1$ and $\delta<1$
such that $\delta-4\sqrt{\alpha}(1-\delta)$ is positive. Then }
$$
\min_{h\in\real}\phi_\delta(\beta,h)
+{\alpha\over 2\beta}\ln[\delta-4\sqrt{\alpha}(1-\delta)]
\le\lim_{\scriptstyle N,p\to\infty\atop\scriptstyle p/N\to\alpha}
\mean f_{p,N,\beta}(\xi)
\le \min_{h\in\real}\phi_0(\beta,h)\,.
\eqno(\ulbound)
$$
\advance\vsize by 26pt
\medskip\noindent{\bf Remarks.\ } The upper bound in (\ulbound) is well known:
It is the free energy density of the Curie--Weiss model,
which is obtained by dropping all terms with $\mu>1$
from the Hamiltonian (\hamilt). For $\alpha=0$ the lower bound in (\ulbound)
coincides with the upper bound as $\delta\downarrow 0$. In this case
we recover a recent result of Shcherbina and Tirozzi [\rSTa].
For $\beta<1$, the lower bound with $\delta=1-\beta$
agrees with the correct free energy density up to $\OO(\alpha^{3/2})$;
see e.g. the references [\rAGS] and [\rSTb].
\bigskip\noindent{\bf Proof.\ } Consider the overlap matrix $A$,
$$
A_{\mu\nu}={1\over N}\sum_{i=1}^N\xi_i^\mu\xi_i^\nu-\delta_{\mu\nu}\,,
\qquad \mu,\nu=1,\ldots,p.
\eqno(\amunu)
$$
An estimate in [\rSTa] implies that there exists a real number $\lambda$
and an even positive integer $n$, both depending on $N$, such that
$$
\lambda^{-n}\mean\tr A^n\le\oo(N^{-1})\,,\qquad\lambda=4\sqrt{\alpha}+\oo(1)\,,
\eqno(\abound)
$$
as $N\to\infty$. A stronger version of this estimate (inequality (\etran)),
which could be of independent interest, will be proved below.
Assume now that $\alpha$ and $\delta$ satisfy the hypothesis of the theorem,
and that $N$ is sufficiently large such that $\delta-\lambda(1-\delta)>0$.
Denote by $\langle.,.\rangle$ the standard inner product on $\real^p$,
and denote by $\xi_i$ the $i$--th column of the matrix $\xi$. If $\xi$ is such
that the operator norm of $A$ is less than or equal to $\lambda$,
we get an upper bound on the sum in (\efree) from the identity
$$
\eqalign{
\sum_{S\in\SS}e^{-\beta H_{p,N,\xi}(S)}
&=\intdpm e^{-\half N\beta\langle m,m\rangle}
\sum_{S\in\SS}e^{\beta\sum_{i=1}^N\langle m,\xi_i\rangle S_i}\cr
&=\intdpm e^{-\half N\beta\langle m,m\rangle}
\prod_{i=1}^N2\cosh\bigl(\beta\langle m,\xi_i\rangle\bigr)\cr
&=\intdpm e^{-\half N\beta\langle m,[\delta+(1-\delta)A]m\rangle
-\beta\sum_{i=1}^N\phi_\delta(\beta,\langle m,\xi_i\rangle)},\cr}
\eqno(\pfun)
$$
if we replace the matrix $A$ by $-\lambda$
and the function $\phi_\delta(\beta,.)$ by its minimum value.
The resulting lower bound on the free energy density
can be extended to all patterns $\xi$, by adding to it
the trivial bound $-[p/2+const]$, multiplied by the positive factor
$\lambda^{-n}\tr A^n$ which is larger than $1$ whenever $||A||>\lambda\,$:
$$
f_{p,N}(\beta,\xi)\ge\min_{h\in\real} \phi_\delta(\beta,h)
+{p\over 2\beta N}\ln[\delta-\lambda(1-\delta)]
-[p/2+const]\lambda^{-n}\tr A^n\,.
\eqno(\lbound)
$$
The assertion now follows from (\lbound) and (\abound).
To complete the proof of our theorem we will now
derive a bound that implies (\abound). Let $n\ge 4$.
Then $N^n\mean\tr A^n$ is equal to the number of ordered products
$$
\Pi_0\equiv\xi_{i_1}^{\mu_1}\xi_{i_1}^{\mu_2}\xi_{i_2}^{\mu_2}\xi_{i_2}^{\mu_3}
\cdots \xi_{i_{k-1}}^{\mu_k}\bigl\{\xi_{i_k}^{\mu_k}
\cdots \xi_{i_l}^{\mu_l}\bigr\}\xi_{i_l}^{\mu_{l+1}}
\cdots \xi_{i_n}^{\mu_n}\xi_{i_n}^{\mu_1}\,,
\eqno(\pizero)
$$
(ignoring the braces)
with $1\le\mu_k\le p$ and $1\le i_k\le N$ for all k,
subject to the constraint that $\mu_1\ne\mu_2\ne\ldots\ne\mu_n\ne\mu_1$,
and that each random variable $\xi_i^\mu$ appears an even number of times.
Such products will be referred to as {\it contributing} products.
Note that the constraint implies that the cardinalities
$u=|\{\mu_1,\ldots,\mu_n\}|$ and $v=|\{i_1,\ldots,i_n\}|$
satisfy $u+v\le n+1$ and $2v\le n$.
A contributing product will be called {\it simple}
if its $2n$ factors can be grouped into $n$ pairs
of identical random variables in such a way that
if each pair is marked by braces as shown in equation (\pizero)
for the pair $(\xi_{i_k}^{\mu_k},\xi_{i_l}^{\mu_l})$,
then the ``sets'' corresponding to different pairs
are either nested or disjoint.
Note that such pairings can be specified by giving only the $n$ left braces.
Thus, since each pairing determines whether or not any two indices
can have different values, the number of simple products can be bounded by
$$
c_n(p)\equiv{\textstyle{2n\choose n}}
\max_{\scriptstyle u+v\le n+1\atop\scriptstyle v\le n/2} p^uN^v\,.
\eqno(\cnofp)
$$
Assume now that $\Pi_0$ is a non--simple contributing product.
Below we will give a procedure for cutting $\Pi_0$
between some top--linked pairs
(adjacent factors that have a common upper index,
or the first and last factor) and regrouping
the pieces into a simple product $\Pi_m$ that has the following property $P$:
{\it The first factor of $\Pi_m$ is the same as that of $\Pi_0$;
and if $\omega$ is any closed walk on the factors of $\Pi_m$
such that every odd--numbered (even--numbered) step
is between two factors that are top--linked in $\Pi_m$ ($\Pi_0$),
then, with one possible exception,
all pairs connected by an odd--numbered step of $\omega$
are pairs of identical factors.}
In particular, if we only consider walks whose first step goes to the right,
and for which all but the last (or all) odd--numbered steps
are between identical pairs, then we can find a set $W$ of walks of this type,
such that every factor of $\Pi_m$ is visited by exactly one walk in $W$,
and only once.
Thus, it is possible to encode the original product $\Pi_0$
in a modified version $\Pi_m^\prime$ of $\Pi_m$, where, along each walk
$(\omega_0,\omega_1,\ldots,\omega_\ell=\omega_0)\in W$ of length $\ell\ge 4$,
the upper index in the identical factors $\omega_{2k-2}$ and $\omega_{2k-1}$
has been replaced by a pointer to the factor $\omega_{2k}$,
for $1\le k<\ell/2$. Since the upper indices of $\Pi_m^\prime$ can take
at most $p+2n$ different ``values'', it follows that
the number of contributing products is bounded by $c_n(p+2n)$, and hence
$$
\mean {\rm tr} A^n \le N^{-n}c_n(p+2n)
\le 4^n(p+2n)\bigl({p+2n\over N}\bigr)^{n/2}\,,
\qquad\qquad p+2n\le N.
\eqno(\etran)
$$
We get (\abound) by taking e.g.
$\lambda=4(1+n^{-1/2})\bigl({p+2n\over N}\bigr)^{1/2}$
with $n=$ twice the integer part of $\sqrt{N}$.
Finally, to construct $\Pi_m$ from $\Pi_0$ we iterate the following step.
Assume that $\Pi_{m-1}$ is non--simple. Then, in this product,
it is possible to find an odd number $\ge 3$ of consecutive factors
$\xi_{i^\prime}^\mu\xi_i^\mu\cdots\xi_{j^\prime}^\mu$ such that
$\xi_{j^\prime}^\mu$ is top--linked to a factor $\xi_j^\mu$
that is identical to $\xi_i^\mu$, with $i\ne i^\prime$ and $j\ne j^\prime$.
After choosing such a sub--product, we now reverse the order of the factors
$\xi_i^\mu\cdots\xi_{j^\prime}^\mu$ in $\Pi_{m-1}$
and define $\Pi_m$ to be the result of this operation.
Note that this creates
a new top--linked pair $(\xi_i^\mu\,,\xi_j^\mu)$ of identical factors.
Thus, the iteration ends (with a simple product) after less than $n$ steps.
It is easy to check
that each step preserves the abovementioned property $P$,
and that contributing products are mapped to contributing products.
% ----------------------
\bigskip\noindent
{\bf References}\hfil\break
\smallskip
\item{[\rAGS]} D.J.~Amit, H.~Gutfreund, and H.~Sompolinsky, {\it
Spin--Glass Models of Neural Networks},
Phys.~Rev.~A {\bf 32}:1007 (1985).
\item{[\rSTa]} M.~Shcherbina and B.~Tirozzi, {\it
Exact Mean Field Theory for Some Hopfield Model},
\break Preprint (1991).
\item{[\rSTb]} $X$.~Scacciatelli and B.~Tirozzi, {\it
Fluctuation of the Free Energy in the Hopfield Model},
\break Preprint (1991).
\bye