\documentstyle[11pt]{article}

%%%%%%%%%%%%
\def\a{\alpha}
\def\one{{\bf 1}\hskip-.5mm}
\def\ind{{\bf 1}\kern-1.1mm{\rm I}}
\def\P{{I\kern-0.3emP}}
\def\Q{{\P_A}}
\def\pa{{\P(A)}}
\def\fa{{f_A}}
\def\ga{ {\frac{1}{\pa}} }
\def\fp{{\fa\pa}}
\def\pal{{\P(A^{(l)})}}
\def\la{ \lambda_{A} }
\def\xa{ \xi_{A} }
\def\Zeta{ \Psi }
\def\laf{ \lambda_{A,f} }
\def\lafa{ \lambda_{A,f_A} }
\def\lak{ \lambda_{A,k} }
\def\lam{ \lambda_{A,\mu} }
\def\xaf{ \xi_{A,f} }
\def\xak{ \xi{A,k} }
\def\xam{ \xi_{A,\mu} }
\def\xamt{ \xi_{A,\mu(t)} }
\def\lab{ \bar\lambda_{A} }
\def\sik{ \sigma_{A,k} }
\def\sin{ \sigma_{A,n} }
\def\sif{ \sigma_{A,f} }
\def\zas{ \zeta_{A,s} }
\def\za{ \zeta_{A,s} }
\def\del{ \delta }
\def\Del{ \Delta }
\def\maj{ 2 \Delta \pa + *(\Delta-n) }
\def\ta{{\tau_A}}
\def\bbz{{Z\kern-0.45emZ}}
\def\bbr{{I\kern-0.3emR}}
\def\bbn{{I\kern-0.3emN}}
\def\bbe{{I\kern-0.3emE}}
\def\ofpt{(\Omega ,{\cal F}, \P,T)}
\def\dyn{(T,{\cal F},\P )}
\def\C{{\cal C}_n}
\def\ea{{\P(A)^{1 \over 2}}}
\def\lp{ \left( }
\def\rp{ \right) }
\def\ll{ \left\{ }
\def\rl{ \right\} }
\def\lc{ \left[ }
\def\rc{ \right] }
\def\lv{ \left\vert }
\def\rv{ \right\vert }
\def\beq{ \begin{displaymath} }
\def\eeq{ \end{displaymath}  }
\def\beqn{ \begin{equation} }
\def\eeqn{ \end{equation}  }
\def\beqa{ \begin{eqnarray*} }
\def\eeqa{ \end{eqnarray*} }
\def\beqan{ \begin{eqnarray} }
\def\eeqan{ \end{eqnarray} }
\def\nn{ \nonumber }
\def\vs{ \vskip4truemm }
\def\ds{ \displaystyle }
\def\ts{ \textstyle }
\def\ps{ \scriptstyle }
\def\cross{\times \;}
\def\wt{\widetilde}
\def\go{\rightarrow}
\def\p{{\cal P}}
\def \Sup{\mathop{\rm Sup}}
\def\proba{\mathchoice{\rm I\hskip-1.9pt P}{\rm I\hskip-1.9pt 
P} 
{\rm I\hskip-.9pt P}{\rm I\hskip-1.9pt P}}
\def\proof{\noindent{\bf Proof. }}

\def\Lemma(#1){Lemma \letichetta(#1)\hskip-1.6truemm}
\def\Theorem(#1){{Theorem \letichetta(#1)}\hskip-1.6truemm}
\def\Proposition(#1){{Proposition \letichetta(#1)}\hskip-
1.6truemm}
\def\Corollary(#1){{Corollary \letichetta(#1)}\hskip-
1.6truemm.}
\def\Remark(#1){{\noindent{\bf Remark \letichetta(#1)\hskip-
1.6truemm.}}}
\let\ppclaim=\plainproclaim
\let\EQS=\label\let\EQ=\label
\let\eqs=\eq
\let\labelas=\labela
\let\eqas=\eqa


\title{Inequalities for the occurrence times of rare events in mixing 
processes. The state of the art.%
  \thanks{Work done within the Projeto Tem\'atico ``Rhythmic patterns,
    parameter setting and language change'', supported by FAPESP (grant
    98/3382-0, and as part of the activities of the Pronex Project
    ``Critical phenomena in probability and stochastic processes''
    (grant 41.96.0923.00)}}
\author{Miguel Abadi%
\thanks{Supported by CAPES (Ph.D. grant)}
\and  Antonio Galves%
\thanks{Work partially supported by CNPq (research grant 301301/79)}
}


\bibliographystyle{plain}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}
\maketitle

\rightline {\sl Dedicated to Joel Lebowitz}
\rightline {\sl
 on his 70th anniversary}

\begin{abstract} The first occurrence time of a rare event in a mixing process
  typically has a distribution which can be well approximated by the
  exponential law. In this paper we review recent theorems giving upper bounds
  for the error term of this approximation. We shall focus on papers treating
  the problem in a general mixing framework.
\end{abstract}

{\bf Running title}: Rare events in mixing processes.

{\bf Keywords}: Mixing processes,
occurrence time of a rare event, exponential approximation.

{\bf AMS subject classification}: 60G10, 60F05, 37A50


\section{\bf Introduction.}
The first occurrence time of a rare event in a mixing process typically has a
distribution which can be well approximated by the exponential law. In this
paper we review recent theorems giving upper bounds for the error term of this
approximation. We shall focus on papers treating the problem in a general
mixing framework.

The pioneer paper in this area is D\"oeblin (1940), who studied the Poisson
approximation for the Gauss transformation.  In the context of Markov chains,
the convergence of the occurrence time of a rare event to the exponential law
was first studied by Bellmann and Harris (1951) and Harris (1953). Then after
a long period in which apparently nothing appeared in the area, several papers
and books studying the problem for different types of Markov processes were
published, starting with Keilson (1979), Aldous (1982), Korolyuk and
Sil'vestrov (1984), Cassandro, Galves, Olivieri and Vares (1984), Kipnis and
Newman (1985), Cogburn (1985), Lebowitz and Schonmann (1987), Olivieri and
Scoppola (1995) and (1996) among others.  In the context of dynamical systems
the question was considered by Galves and Schmitt (1990), Pitskel (1991),
Collet, Galves and Schmitt (1992), Collet and Galves (1993), Hirata (1993) and
Collet and Galves (1995).  Part of these papers were motivated by the modeling
of physical phenomena like metastability or intermittency. With the exception
of Aldous (1982), all the others are only interested in a qualitative result,
and don't present upper bounds for the exponential approximation.

In the nineties, several papers presented bounds for the error of the
exponential approximation. Aldous and Brown (1992, 1993) studied the case of
reversible and finite Markov chains. Results for different types of
interacting Markovian systems appeared in Ferrari, Galves and Landim (1994),
Ferrari, Galves and Liggett (1995), Asselah and Dai Pra (1997), Simonis
(1998).

The subject is considered in the context of different types of mixing
processes in recent papers by different authors. Galves and Schmitt (1997)
consider $\phi$-mixing dynamical system with respect to a summable function
$\phi$.  Collet, Galves and Schmitt (1999) consider one-dimensional Gibbs
states, with H\" older continuous potentials. Haydn (1999) worked with
rational maps on the Riemann sphere. Paccaut considers the case of weighted
maps of the interval. Hirata, Saussol and Vaienti (1999) prove the Poisson
approximation for the successive entrance times of a rare event.  Their proof
holds in a very general framework and provides sharp bounds for sets which
does not recur very fast.  Finally the recent paper by Abadi (2000) proves the
exponential approximation for any $\phi$-mixing processes and also for
$\alpha$-mixing processes with summable $\alpha$. As far as we know this is
the best result available in the field.

This paper is organized as follows.  The basic definitions and notation is
presented in section 2. Then in the next sections we review Aldous (1982),
Aldous and Brown (1992), Galves and Schmitt (1997), Collet, Galves and Schmitt
(1999), Hirata, Saussol and Vaienti (1999) and Abadi (2000). 

\section{\bf Basic definitions and notation.}


Let ${\cal E}$ be a finite set. Put $\Omega={\cal E}^\bbz$.  For each $n \in
\bbz$, define $X_n:\Omega \rightarrow \Re$ the $n$-th coordinate projection.
We denote by $T:\Omega \rightarrow \Omega$ the one-step left shift operator.

We denote by ${\cal F}$ the $\sigma$-algebra over $\Omega$ generated by
cylinders. Moreover we denote with ${\cal F}_{I}$ the $ \sigma$-algebra
generated by cylinders with coordinates in $I$, $I \subseteq\bbz$.

Let ${\cal C}_n$  be the set of all cylinders $A$ of the form 
N\beq
A = \{ X_0 = a_0, \dots , X_{n-1} = a_{n-1} \} \ , 
\eeq
with $a_i \in {\cal E}, \; i=1, \dots ,n$. 

Let $\P$ be a probability measure  over ${\cal F}$, such that
\beq
\P(A)=\P(T^{-1}(A))\ , 
\eeq
for all $A \in {\cal F}$. This is called a {\sl stationary} measure in the literature.
To avoid uninteresting pathologies we shall also assume that 
\beq
0 <\P\{ X_n = a\}<1 \ , 
\eeq
for all  $a \in {\cal E}$.


We shall say that 
$\ofpt$ is $\gamma${\sl -mixing} or uniform mixing if, for all integers 
$n \ge 1$ and $l \ge 0$, the following inequality holds 
\beq
\sup_{B \in {\cal F}_{\{0,.,n\}}, C \in {\cal F}_{\{n\ge 0\}} }
\lv \P\lp B \cap T^{-(n+l+1)}C\rp - 
\P(B)\P(C)\rv =
\gamma(l) \ , 
\eeq
$\alpha${\sl -mixing} if
\beq
\sup_{B \in {\cal F}_{\{0,.,n\}}, C \in {\cal F}_{\{n\ge 0\}} }
\frac{\lv \P\lp B \cap T^{-(n+l+1)}C\rp - 
\P(B)\P(C)\rv}{\P(B)} =
\alpha(l) \ , 
\eeq
and $\phi${\sl -mixing} if 
\beq
\sup_{B \in {\cal F}_{\{0,.,n\}}, C \in {\cal F}_{\{n\ge 0\}} }
\frac{\lv\P\lp B \cap T^{-(n+l+1)}C\rp - 
\P(B)\P(C)\rv}{\P(B)\P(C)} =
\phi(l) \ , 
\eeq
where in the above expressions the supremum is taken over the sets $B$ and
$C$, such that $\P(B)>0$ in the second case and such that $\P(B)\P(C)>0$ in
the third one, and $\gamma=(\gamma(l))_{l \ge 0}$, $\alpha =(\alpha(l))_{l \ge 0}$
and $\phi =(\phi(l))_{l \ge 0}$ are decreasing sequences of positive real
numbers converging to zero.


Clearly, $\phi$-mixing implies $\alpha$-mixing and this last one implies
uniform mixing.  We observe that an irreducible and aperiodic finite state
Markov chain is $\phi$-mixing with exponential decay.  Moreover, Gibbs states
on $\Omega={\cal E}^\bbz$, with a potential with summable variations are
exponentially $\phi$-mixing.

\vs
\noindent{\bf Remark.} In the literature the mixing conditions have received
different names. For example, Doukhan (1995) calls $\alpha$-mixing
(respectively $\phi, \psi$), what we call $\gamma$-mixing (respectively
$\alpha, \phi$).  Also, Shields(1996) use $\psi$-mixing to denote what we call
$\phi$-mixing. Our terminology is the same adopted by Galves and Schmitt
(1997) Collet, Galves and Schmitt (1999) and Hirata, Saussol and Vaienti
(1999).

\vskip4truemm

Given $A \in {\cal C}_n$, we define the {\sl entrance time} 
${\ta}: \Omega \rightarrow \bbn$ as follows
\beq
\ta(\omega) = \inf\{k \ge 0 : T^k(\omega) \in A \}\ . 
\eeq

We shall use the classic probabilistic shorthand notation for events defined
through random variables.  We shall write $\{\ta = m \}$ instead of $\{\omega
\in \Omega : {\ta}(\omega) = m \}$, and $T^{-k}(A)$ instead of $\{\omega \in
\Omega : T^k(\omega)\in A \}$.  As usual, the mean of a random variable $X$
will be called its {\sl expectation} and denoted by $\bbe(X)$.  We also denote
the conditional measure with respect to the event $A$ \beq \Q\ll W \rl = \P\ll
A \cap W \rl/ \P\ll A \rl\ , \eeq for any $W \subseteq {\cal F}$.

We conclude this section with some references. The nice book by Shields (1996)
is a wonderful introduction to mixing processes and to Ergodic Theory.  We
refer to Doukhan (1995) for examples and references of $\alpha$-mixing
processes that are not $\phi$-mixing and for $\phi$-mixing processes decaying
at any rate.  For definitions and basic properties of Gibbsian models we refer
the reader to Bowen (1975).


\section{\bf Aldous 1982.}

As far as we know, Aldous (1982) is the first paper which consider the problem
of estimating the error term in the exponential approximation.  The result
holds when $(X_n)$ is an irreducible Markov chain taking values in the finite
set ${\cal E}$.  Let us define $\eta$ as the relaxation rate of the chain,
namely, 
\beqn 
\eta := \min_{n \in \bbn} \ll \Vert \P\lp X_n=\,\cdot\, |
X_0=a\rp-\pi\lp\cdot\rp\Vert \le {1\over {e^2}} \;{\rm for \; all \;} a \in {\cal
  E} \rl \ , \label{eq:ald} \nn 
\eeqn 
where $\pi$ is the probability measure on ${\cal E}$ invariant with respect to
the chain and
\beq
\Vert \mu -\nu \Vert =
{1\over2} \sum_{a\in{\cal E}} |\mu(a)-\nu(a)|
\eeq
is the total variation norm.

The following theorem holds
\proclaim{\bf Theorem}. For any subset $A \subset {\cal E}$
\beq
\sup_{t > 0} \lv \P\ll \ta > t \rl
               - e^{\ds{- t/ \bbe(\ta) }}  \rv 
\le  \frac{C\eta}{\bbe(\ta)}
\lp 1+\log^+ \lp{\bbe(\ta)\over\eta} \rp\rp  \ . 
\eeq


A simple example will help understanding the behavior of this upper bound. 
Consider a  Markov chain $(X_n)_{n \in \bbn}$  with state 
space ${\cal E}=\ll a,b\rl$ 
and with transition probability matrix 
\beqa
Q = 
\lp
\begin{array}{ccc}
& 1-p & p  \\
    &   q & 1-q\\
\end{array} \rp
\eeqa
with $0<p+q<1$.
Let $A=\ll a\rl$. We have that $\pi(a) = q/(p+q)$. So, the interesting case is
when $p/q \go \infty$.
In this case the expectation of the hitting time is explicitly
computable and has the value
\beq 
\bbe\lp \ta \rp = {p \over (p+q) q }.  
\eeq
Moreover, an elementary computation shows that
\beq
\Vert \P\lp X_n=\,\cdot\, 
|X_0=a\rp-\pi\lp\cdot\rp\Vert=\left(1-(p+q)\right)^n\ .
\eeq 
It follows that
\beq
\eta \approx {-2 \over \log\lp 1-(p+q)\rp}\ . 
\eeq
Remembering that $p<<q$, we obtain the estimate
\beq
{\bbe(\ta)\over\eta} \approx {p \over q} \ ,
\eeq
and therefore the upper bound in (\ref{eq:ald}) is estimated by
\beq
\frac{C \ p}{q} \lp 1+ \log^+ \lp {q \over 2p} \rp \rp  
\approx -\pi(a) \log\pi(a) \ .
\eeq

Just for comparison a direct estimation of error term 
can be done, since the hitting time, conditioned on
$\ll \ta > 0 \rl $ has a geometric distribution with
parameter $1-q$.
The mean value theorem gives the following bound for
the error of the approximation.
\beq
\lv -\log(1-q) - {1\over\bbe(\ta)} \rv 
\ \max \ll -\log(1-q) \ ; \  {1\over\bbe(\ta)} \rl^{-1} 
\approx \pi(a) \ .
\eeq

\section{\bf Aldous and Brown 1992}

In a further paper Aldous and Brown (1992) study the hitting time of a rare
event $A \subset {\cal E}$ in the following setting.  $\ll X_t:t\ge 0\rl$ is a
continuous-time finite-state Markov chain with transition matrix $Q= (q(i,j);
i,j \in {\cal E} )$ and where $q(i,i)= - \sum_{i \not= j} q(i,j)$.  Here
$\eta= 1/\lambda_2$, where $ \lambda_2$ is the second eigenvalue of $-Q$.

 
\proclaim{\bf Theorem}. For any subset $A \subset {\cal E}$
\beqn
\sup_{t > 0} \lv \P\ll \ta > t \rl
               - e^{\ds{- t/ \bbe(\ta) }}  \rv 
\le  \frac{\eta/\bbe(\ta)}{1+ \eta/\bbe(\ta)}
\le   \eta/\bbe(\ta) \label{eq:aldous} \nn \  . 
\eeqn

\vs This upper bound is useful only if the relaxation time of the chain is
much smaller than the expectation of the occurrence time of the rare event.
\vs

An example can help understanding the type of difficulties one can find to
apply this upper bound. In Ferrari, Galves and Ligget (1995) there is an
upper bound for the symmetric simple exclusion process. The process can
be informally defined as follows. Infinitely many particles perform
interacting random walks on $\bbz$. At most
one particle is allowed in each site $x\in \bbz$ and, at rate one, the
contents of sites $x$ and $x+1$ are interchanged. Hence if both sites are
occupied or both sites are empty, nothing happens but if one of the sites is
occupied and the other is empty, the interchange is seen as a jump of the
particle to the empty site.  

The only extremal invariant measures for this system are the product measures
$\mu_\rho$, $\rho\in [0,1]$. Under the measure $\mu_\rho$, the probability
that a site is occupied by a particle is $\rho$ and the occupation variables
of different sites are independent random variables.

Consider the symmetric simple exclusion process over $\bbz$ starting with the
equilibrium product measure $\mu_\rho$ with density $\rho \in (0,1)$.  Let
$\tau_N$ the first time for which the interval $\{1, \dots , N\}$ is totally
occupied. Since the symmetric simple exclusion process is reversible with
respect to $\mu_\rho$, it is tempting to apply Aldous and Brown approach to
obtain the error for the law of $\tau_N$.  Unfortunately the process takes
values on $\{0,1\}^{\bbz}$ which is not finite, and therefore a direct
application of the result of Aldous and Brown is excluded.

A possible way to overcome this difficulty is to approximate the infinite
particle system by a sequence of symmetric simple exclusion processes
taking values on the finite sets $\{0,1\}^A_N$, where $A_N =[-\ell_N,
N+\ell_N]$ with any boundary condition that leaves invariant the measure
$\mu_\rho$ restricted to the box. We need to estimate $\eta_N/ \bbe(\tau_N)$
where $1/\eta_N$ is the second eigenvalue of the continuous time Markov
chain in the finite box.

To apply this to obtain the result for the infinite system we need to take a
box of length $\ell_N=(\bbe\tau_N)^{(1+\varepsilon)/2}$ for some
$\varepsilon>0$.  This is necessary to guarantee that the time of occurrence
of the rare event for the finite and the infinite systems behave in the same
way. Indeed the influence of the boundary behaves {\it at~least} as a simple
symmetric random walk, and it takes a time of the order of the square of the
length of the box to arrive to the center of the box. On the other hand, Lu
and Yau (1993) proved that $\eta_N$ is of the order at least of the square
of the length of the box.  Hence the upper bound in (\ref{eq:aldous}) diverges
and the finite approximation cannot be used naively.


Actually, Ferrari, Galves and Ligget (1995) presents the following upper bound

\proclaim Theorem. There exists positive constants $\a'$, $\a''$, $A$ and $A'$
independent of 
$N$ and a sequence $\a_N\in[\a',\a'']$ such that 
$$
\sup_{t\ge 0}\left\vert 
P\left\{\a_N\rho^N\tau_N >t\right\} - e^{-t} \right\vert 
\le A \rho^{A'N}.
$$
We remark that the scaling factor $\rho^N$ is just the probability of the
rare event by $\mu_\rho$. This is suggested by Kac`s Lemma(cf. \cite{Kac})
which says that the expectation of the first return time to a given event is
just the inverse of the probability of the event.

The approach followed by Ferrari, Galves and Ligget (1995) was successively
used by Asselah and Dai Pra (1997) to obtain the error term for more general
types of rare events for the symmetric simple exclusion process and by Simonis
to study the same type of rare event for the contact process. This approach was
also the starting point of the result by Galves and Schmitt (1997).


\vskip 2truemm
\section{\bf Galves and Schmitt 1997}


This paper considers the general setting of $\phi$-mixing processes, with
$\phi$ summable. The result holds for any type of cylinder set $A$. The scaling
factor is $\la\pa$
with $\la$ belonging to an interval bounded away from zero (an containing the
number one). The result is the following.
\vskip 2truemm

\proclaim{\bf Theorem}. Let
$\ofpt$ be $\phi$-mixing and let us
suppose that the function  $\phi:{\bbn}\rightarrow {\bbr}_{+}$ is
summable. Then, there exist four strictly positive constants $ {\Lambda}_1,
{\Lambda}_2, \beta, C$,  such that for any $n$ and any
$A \in {\cal C}_n$, there exists
$ \lambda(A) \in \left[\Lambda_1, \Lambda_2 \right] $, for which the
following inequality holds
\beqn
\sup_{t > 0}\left\vert \P\left\{\ta > {t \over {\lambda(A) \pa}}\right\}
- e^{\ds{- t}}\right\vert \le C \P(A)^{\beta} \  . \label{eq:gasc} \nn
\eeqn

\vskip4truemm 

The proof of the theorem has two parts.  First it is shown a
factorization property holds for $\P\ll \ta >\fa t\rl$, namely
\beq
\P\ll \ta > \fa (t+s) \rl=
\P\ll \ta > {\fa t }\rl\P\ll \ta > {\fa s }\rl 
\eeq
for all $t$ and $s$ in $\Re_+$, where $\fa$ is a suitable scaling factor.
This will follows as a consequence of the mixing property, if the event $\ll
\ta > {\fa(t+s) }\rl$ is replaced by the event which says that the cylinder $A$ is
not visited between times $0$ and $\fa t$ and between $\fa t +\Del$ and
$\fa(t+s)$, where $\Del$ is a suitable gap.
The game here is to compensate the length of the gap with the 
loss of memory of the process.


The second part of proof is to choose the right scaling factor $\fa$, in such
a way that the limit is a non-degenerated distribution.

A natural question suggested by the theorem is the precise value of the
parameter $\la$.  Kac's lemma suggests that $\la=1$. Collet, Galves and
Schmitt (1999) shows that this is typically the case.

\vskip 2truemm
\section{\bf Collet, Galves and Schmitt 1999}

Collet, Galves and Schmitt (1999) shows that the correction factor depends on
the short time returns of the rare event to itself in the firsts steps. As a
consequence, for a overwhelming set of cylinders the scaling factor $\la$ in
(\ref{eq:gasc}) is asymptotically one, as suggested by Kac's lemma. 

\vskip 4truemm 

\noindent{\bf Definition.} 
Let $s \in \bbn$. Define ${\cal B}_{n}= {\cal B}_{n}(s)$ as the set of
sequences of length $n$ of elements in ${\cal E}$ which recur before time
$n/s$, namely, $A \in {\cal B}_{n}$ if and only if there is an integer $j$, $
1\le j \le n/s$ such that $A \cap T^{-j}A \not= \emptyset$.

\vskip 4truemm 
\proclaim{\bf Theorem}.
Let $\ofpt$ be exponentially $\phi$-mixing and let us
suppose that the function  $\phi:{\bbn}\rightarrow {\bbr}_{+}$ is
summable. Then, there exist two strictly positive constants $c, C$,
such that for any $n$ and any
$A \in {\cal C}_n / {\cal B}_n$, there exists
$ \lambda(A) \in \left[\Lambda_1, \Lambda_2 \right] $, for which the
following inequality holds
$$
\sup_{t > 0} \lv \P\ll \ta > {t \over  \pa}\rl
               - e^{\ds{- t}} \rv \le C e^{-cn} \  . 
$$
Moreover, 
\beq
\P\big\{ {\cal B}_{n}(3)\big\}\le Ce^{-cn}\;. 
\eeq


\section{\bf Hirata, Saussol and Vaienti 1999}

This paper shows that the distribution of the first hitting time into 
$A$ and the first return time to $A$, suitably rescaled, 
are close to an exponential one law if and only if 
both  distributions  are mutually close in the following sense,

\vs
\proclaim{\bf Theorem}.
Let us define $c(k,A) = | \Q(\ta>k) - \P(\ta>k) |$
and set $c(A) = \sup_k c(k,A)$.
The distribution of the rescaled first hitting time into the set $A$
differs from the exponential-one law up to
$d(A) := 4 \pa+ c(A) (1+\log c(A)^{-1})$, namely~:
\beq
\sup_{t\geq 0} 
\left| \P \lp \ta > \frac{t}{\pa} \rp - e^{-t} \right|
\leq d(U)
\eeq
and the same holds starting from $A$~:
\beq
\sup_{t\geq 0} 
\left| \Q \lp \ta > \frac{t}{\pa} \rp - e^{-t} \right|
\leq d(U).
\eeq
Conversely, the difference between the rescaled hitting time and the 
rescaled return time can be bounded in terms of the distance 
$\wt{c}(A):= \sup_{t\geq 0} | \Q(\ta>t/\pa) - e^{-t}|$,
namely,
\beq
c(A) \leq 2\pa + \wt{c}(A) ( 2 + \log \wt{c}(A)^{-1}) \ .
\eeq

This theorem gives a {\em necessary and sufficient condition} to obtain the
exponential law for the first hitting time and the first return time. That is
$c(A) \to 0$, if and only if $\wt{c}(A) \to 0$.  The proof holds for any
stationary measure $\P$, without any assumption with respect to its
ergodicity.  It is clear that with this is generality we can not expect to
assure the convergence of $c(A)$. The next lemma gives an upper bound for
$c(A)$, which indicates when we can expect convergence.

\vs

\proclaim{\bf Lemma}.
Let $A\subset \Omega$ a measurable set. 
The following estimate holds
\beq
c(A) \leq \inf_{k\in\bbn} \ll a_k(A) + b_k(A) + k \pa \rl
\eeq
where the quantities are defined by
\beqa
a_k(A) &=& \Q ( \bigcup_{j=1}^k T^{-j}A )= \Q(\ta \leq k) , \\
b_k(A) &=& \sup_{W\in {\cal F}_{\{0,.,n\}}} | \Q(T^{-k}W) - \P(W)|
\eeqa

\vs

Note that $b_k(A)= \alpha(k-n)$, if the process is $\alpha$-mixing, and
$b_k(A)= \gamma(k-n)/\pa$, if the process is $\gamma$-mixing.

\vs

The key element in this upper bound is the quantity $a_k(A)=\Q(\ta \leq k)$.
For instance, if $A \in {\cal C}_n /{\cal B}_n(s)$, then $a_k(A)=0$ for
$k=1,...,s$. If the process looses memory fast enough, then $b_k(A)$ decreases
to $0$ and the theorem assures the asymptotic convergence to the exponential
law.

\vs 

It is important to remark that in many situations $c(A)$ does not converge to
zero.  The classical Ehrenfest model provides an example of this fact. The
Ehrenfest model is a Markov chain, taking values on the set ${\cal
  E}=\{0,1\}^d$, where $N$ is a positive integer. For any $x \in {\cal E}$ and
any index $i \in \{1,\ldots,N\}$, the state $x^i$ is defined as the string
which coincides with $x$ in all the coordinates with the exception of
coordinate $i$. We define the transition probabilities of the chain as follows
\beq Q(x, y)= \ll
\begin{array}{cc}
{1\over 2N} & \hbox{if\ \ } x^i=y\ , \hbox {\ \ for some\ \ } i=1, \dots,N \ ,
\\ 
  {1\over2} & \hbox{if\ \ } x=y\ ,   \\ 
         0\  & \hbox{otherwise}\ .
\end{array}
\right.
\eeq
The invariant measure for the chain is the uniform distribution on ${\cal E}$.
Now consider the event $A=\{(1,\dots,1)\}$. If $N$ is  large this is a rare
event. Let us suppose that the chain starts with its invariant  measure and
define $\tau_A$ as the first time the chain visits $A$.

Then a straightforward computation gives that,
\beqa \Q(\ta > 1) &=& 1\over{2} \eeqa
and 
\beqa \P(\ta > 1) &\geq& 1-{{N+1}\over{2N}} \   .\eeqa 
Therefore, $c(1, A)$ converges to $1/2$, when $N$ diverges.
\vs

Hirata, Saussol and Vaienti(1999) and Collet, Galves and Schmitt(1999) prove
that $\pa$ is the right scaling factor for cylinders which do not recur too
fast. Galves and Schmitt(1998) prove that the exponential approximation is
true {\it for all} cylinders using the scaling factor $\la\pa$, where the
possibility that $\la=1$ is not excluded. Therefore the question remains. Is it
true that the parameter $\la$ can be different from $1$? In Abadi (2000) a
computable estimation of this value is presented and it is shown that $\la$
can be really different from $1$.
   

\section{\bf Abadi 2000}


Abadi (2000) presents a modification of Galves and Schmitt's approach and
obtains a sharper upper bound for the exponential approximation.  The
improvement is obtained in part by changing the scaling factor.  Moreover, it
is shown that the scaling factor can be written as $\xamt \pa$, where $\xamt$
is bounded below and above by two strictly positive constants $\Xi_1$ and
$\Xi_2$, respectively, independent of $A, n$ and $t$.  This technique works
for all type of cylinders, for any $\phi$-mixing processes and also for
$\alpha$-mixing processes with summable $\alpha$.

Kac's lemma says that the right scaling factor for return
times is $\pa$. This suggests to use $\pa$ as the scaling factor for hitting
times.  We show that this is not always a good choice for first occurrence
times.


In the main theorem of this paper it is proven that a better rate of
convergence can be  obtained by rescaling the first occurrence time by
a certain correction factor and letting this factor
have some (bounded) fluctuations. 
 
\vskip 2truemm
\proclaim{\bf Theorem 1}. Let $\ofpt$ be 
$\phi$-mixing or $\alpha$-mixing with $\alpha$ summable. 
Then, there exists strictly positive constants $\Xi_1, 
{\Xi}_2, C_1 $ and $C_2$ 
such that for any $n, A \in {\cal C}_n$ and $t>0$ there exists 
$\xamt\in \left[\Xi_1, \Xi_2 \right] $, 
for which the following inequality holds 
\beq
\sup_{t > 0} 
\lv \P \ll \ta > {\frac{t}{\xamt\pa}} \rl  - e^{- t} \rv 
\le   C_2 \inf_{\Delta\ge n} \left[ \maj  \right] \ .
\eeq 
where * stands for $\alpha$ or $\phi$.
Moreover $e^{-\xamt\pa t}$ is left-continuous non-increasing 
function of $t$ over  each $ [k/\pa , (k+1)/\pa]$ and for all $k \in \bbn$
\beq
\lv \lim_{t\rightarrow \lp \frac{k+1}{\pa} \rp^-}  e^{ -\xi_{A,\mu(t)}} - 
                              e^{-\xi_{A,\mu\lp(k+1)/\pa\rp } }  \rv
\le \maj \ .
\eeq

The use of $\xamt \pa$ as scaling factor provides a good approximation to the
exponential law. However an explicit computation of this scaling factor is
usually a most difficult task.  To overcome this real difficulty it is
presented a computable approximation $\za$ for this parameter in terms of the
short recurrence times of each cylinder.  A corollary presents conditions on
the process and on the cylinders, assuring that $\za=1$. In particular, it is
proven that $\za=1$ for an overwhelming set of cylinders.  Finally the paper
presents a class of examples in which the parameter is arbitrarily small.

\vskip 4truemm

The proof uses the following extensions of the result in
Galves and Schmitt(1997).

\vskip 4truemm
\proclaim{\bf Theorem 2}. Let $\ofpt$ be $\phi$-mixing. 
Then, there exists strictly positive constants ${\Lambda}_1, 
{\Lambda}_2,
C_1 $ and $C_2$ 
such that for any $n$ and any $A \in {\cal C}_n$, there exists 
$\la\in \left[\Lambda_1, \Lambda_2 \right] $, 
for which the following inequality holds 
\beq 
C_1 \P(A) \le \sup_{t > 0} 
\lv \P \ll \tau_A > {\frac{t}{\la\pa}} \rl  - e^{- t} \rv 
\le   C_2 \, b(A)
\eeq
where $b(A)=\pa^\eta$, $0<\eta<1$ if $\phi(n)\le C/n^{\epsilon}$ 
for all $n>n_o$ with $\epsilon > 0$ and $b(A)=\phi^{1/2}(n)$ otherwise. 
\vskip 4truemm

Then  upper bound in Galves and Schmitt(1997) is
$\pa^\beta$ with $0<\beta<1$ for $\phi$ summable systems.
Theorem 2 extends that result
for $any$ type of decreasing function $\phi$, and improves the 
upper bound.


\vskip 4truemm
\proclaim{\bf Theorem 3}. Let $\ofpt$ be $\alpha$-mixing
and let us suppose that the function $\alpha: \bbn
\rightarrow \bbr_{+}
$ is summable. Then, there exists strictly
positive constants ${\Lambda}_1, {\Lambda}_2, C_1$, $C_2$ and $\kappa$ 
such that for any $n$ and any $A \in {\cal C}_n$, there exists 
$\lambda_A \in
\left[\Lambda_1, \Lambda_2 \right] $, for which the following 
inequality holds 

\beq
C_1 \P(A) \le \sup_{t > 0} \lv \P\ll\tau_A > 
{\frac{t}{\la\pa}}\rl
                            - e^{-t} \rv \le 
C_2  \pa^\kappa
\eeq

\vskip 2truemm 
We emphasize  that in both theorems the constants 
${\Lambda}_1, {\Lambda}_2, C_1, C_2, \eta$ and $\kappa$ are independent 
of $n$ and $A$. As usual,  $ {\Lambda}_1,{\Lambda}_2, C_1$, and $C_2$ stand
for (possibly) different constants in each theorem.

\vskip 2truemm 

The difference between theorem 1 and theorems 2 and 3 is that the first one
provides a more accurate upper bound, given that we use $\xamt$ instead of
$\la$ as a scaling  factor.

\vskip 2truemm
The next theorem gives an estimation of $\xamt$.
Let $s$ be a positive integer.
We define
\beq
\zas = \P_A \ll \ta >\frac{n}{s} \rl
    = \frac{\P   \ll \ta > \frac{n}{s}  \;  \cap  A  \rl}{\pa}  \ .  
\eeq


\proclaim{\bf Theorem 4}. Let $\ofpt$ be exponentially 
$\alpha$-mixing. Let $s$ be a positive integer. 
Then, there exist strictly positive constants ${\Zeta}_1, 
{\Zeta}_2, C_1, C_2$ and $c$ 
such that for any $n$ and any $A \in {\cal C}_n$, there exists 
$\za\in \left[\Zeta_1, \Zeta_2 \right] $, 
for which the following inequality holds 
\beq 
C_1 \P(A) \le \sup_{t > 0} 
\lv \P \ll \tau_A > t \rl  - e^{- t \za \pa} \rv 
\le   C_2 e^{-c n} \ .
\eeq


\vskip 4truemm
Now we prove that the cylinders for which Theorem 4
holds, are typical in the following sense

\proclaim{\bf Lemma}. Let the system be $\alpha$-mixing (at any rate).
There exist $s \in \bbn$ and  two
positive constants $C$ and $c$ such that 
\beq
\P\big\{ {\cal B}_{n}(s)\big\}\le Ce^{-cn}\;. 
\eeq


\vskip4truemm 
We now provide an example of a process and a sequence of 
cylinders $A_n$ such that $\lambda_{A_n} < 1$.

Let ${\cal E} = \{ 0,1 \}$ and $A=A_n = \{ X_0 =1,\dots,X_{n-1}=1 \} \;
\forall n \in \bbn $. Suppose that $X_n$ are i.i.d. with $\P ( X_n = 1 ) = p
$.  Then $\za=1-p$. Note that in this case, the result by Hirata, Saussol and
Vaienti gives $a_k(A_n)= 1-p$ for all $k=1, \dots, n-1$. Then, the hitting
rescaled by $\pa$ does {\sl not} converge in law to the exponential
distribution. To obtain we need to use $(1-p) \pa$ as scaling factor.

\vskip4truemm 
\noindent {\bf Acknowledgments.} It is a pleasure to thank Amine Asselah,
Xavier Bressaud, Pierre Collet, Pablo Ferrari, Nicolai Haydn, Fr\'ed\'eric
Paccaut, Benoit Saussol, Bernard Schmitt, Elisabetta Scoppola and Sandro
Vaienti for many illuminating discussions about the subject.  

The authors thanks the hospitality of the Centre de Physique Th\'eorique of
the CNRS, at Marseille-Luminy, and the organizers of the meeting {\sl Temps de
  retour, entropie et compl\'exit\'e} where parts of this paper have been
presented.


\begin{thebibliography}{10}

\bibitem{Abadi}
M.\ Abadi (2000). 
\newblock Exponential approximation for hitting times 
in mixing processes. 
\newblock{\sl Preprint IME-USP}.  Can be retrieved from 
{\sl http://rene.ma.utexas.edu/mp\_arc/c 
/00/00-211.ps.gz }

\bibitem{Ald}
D.\ J.\ Aldous (1982).
\newblock Markov chains with almost exponential hitting
times. 
\newblock{\sl Stochastic Process.\ Appl.\ \bf 13} 305-310.

\bibitem{Amine}
\newblock A.\ Asselah and P.\ Dai Pra (1997).
\newblock Sharp estimates for the occurrence 
time of rare events for simple symmetric exclusion.
\newblock {\sl Stochastic 
Processes Appl. \bf 71} 259-273.

\bibitem{AldBro1}
D.\ J.\ Aldous and M.\ Brown (1992).
\newblock Inequalities for rare events in
time-reversible Markov chains~I.
\newblock {\sl Stochastic Inequalities} IMS Lecture Notes {\bf 22} 1-16.

\bibitem{AldBro2}
D.\ J.\ Aldous and M.\ Brown (1993).
\newblock Inequalities for rare events in
time-reversible Markov chains~II. 
\newblock{\sl Stochastic Processes Appl. \bf 44} 15-25.


\bibitem{BellHar}
\ Bellman and T.\ E.\  Harris (1951).
\newblock Recurrence times for the Ehrenfest model.
\newblock {\sl Pacific\ J.\ Math. \bf 1} 179--193.

\bibitem{Bow}
R. \ Bowen (1975).
\newblock Equilibrium states and the ergodic theory of 
Anosov diffeomorphisms.
\newblock {\sl Lecture Notes in Math.} {\bf 470}.
\newblock Springer-Verlag, New York.

\bibitem{Cassandro}
\newblock M. Cassandro, A. Galves, E. Olivieri and M. E. Vares (1984). 
\newblock Metastable behavior of stochastic dynamics: a pathwise approach. 
\newblock {\sl J. Stat. Phys. \bf 35}:603-633.


\bibitem{Cog}
\newblock R. Cogburn (1985).
\newblock On the distribution of first passage and return times
for small sets. 
\newblock{\sl Ann.\ Probab.\ \bf 13} 1219-1223.

\bibitem{close}
\newblock P.\ Collet and A.\ Galves (1993).
\newblock Statistics of close visits to the indifferent
fixed point of an interval map. 
\newblock {\sl J. Stat.Phys.} {\bf 72}, 459-478.

\bibitem{expand}
\newblock P.\ Collet and A.\ Galves (1995) {\sl Asymptotic distribution of entrance
times for expanding maps of the interval}.
\newblock {\sl Dynamical Systems and
Applications}, WSSIAA {\bf 4}, 139-152.

\bibitem{intermit}
\newblock P.\ Collet, A.\ Galves and B.\ Schmitt (1992).
\newblock Unpredictability
of the occurrence time of a long laminar period in a model of temporal
intermittency. 
\newblock {\sl Ann.\ Inst.\ H.\ Poincar\'e, Section A, \bf 57} 3:319-331.


\bibitem{CGS}
\newblock Collet, A.\ Galves and B.\ Schmitt (1999).
\newblock Repetition times for gibbsian sources. 
\newblock{\sl Nonlinearity  \bf {vol. 12} } 1225-1237.


\bibitem{Doe}
\newblock W.\ D\"oeblin (1940). 
\newblock Remarques sur la th\'eorie m\'etrique des fractions
continues. 
\newblock {\sl Compositio Math.} {\bf 7} 353-371.

\bibitem{Dou} 
P.\ Doukhan (1995) Mixing. 
\newblock Properties and examples. 
\newblock {\sl Lecture Notes in Statistics \bf 85}. 
\newblock Springer-Verlag.

\bibitem{zero}
\newblock P.\ A.\ Ferrari, A.\ Galves and C.\ Landim (1994).
\newblock Exponential waiting time for a big gap in a one
dimensional zero range process. 
\newblock {\sl Ann.\ Probab.\ \bf 22} 1:284-288.

\bibitem{FGL}
\newblock P.\ A.\ Ferrari, A.\ Galves and T.\ Liggett (1995).
\newblock Exponential waiting time for filling a large interval
in the symmetric simple exclusion process.
\newblock {\sl Ann.\ Inst.\ H.\ Poincar\'e} {\bf 31} 1:155-175.

\bibitem{GS}
\newblock A.\ Galves and B.\ Schmitt (1997).
\newblock Inequalities for hitting times in mixing dynamical systems. 
\newblock {\sl Random \ Comput. \ Dyn.}\ {\bf 5}, 337-348.

\bibitem{Haydn}
\newblock N.\ Haydn (1999).
\newblock The distribution of the first return time for
rational maps.
\newblock {\sl J.\ Stat.\ Phys.}\ {\bf 94} 5-6:1027-1036.

\bibitem{H}
\newblock M.\ Hirata (1993).
\newblock Poisson law for Axiom A diffeomorphism.
\newblock {\sl Ergod. Th. Dyn. Sys.} {\bf 13}. 533-556.

\bibitem{HSV}
\newblock M.\ Hirata, B.\ Saussol and S.\ Vaienti (1999).
\newblock Statistics of return
times: a general framework and new applications. 
\newblock {\sl Comm. \ Math. \ Phys.}\ {\bf 206} 33-55.

\bibitem{Har}
\newblock T.\ E.\ Harris (1953).
\newblock First passage and recurrence distributions.
\newblock {\sl Trans.\ Amer.\ Math.\ Soc.\ \bf 73} 471-486.

\bibitem{Kac}
\newblock M.\ Kac (1947).
\newblock On the notion of recurrence in discrete stochastic processes
\newblock {\sl Bull.\ Amer.\ Math.\ Soc.\ \bf 53} 1002-1010.

\bibitem{Keilson} 
\newblock J.\ Keilson (1979) 
\newblock {\sl Markov Chain Models.
    Rarity and Exponentiality.}  
\newblock Springer-Verlag.

\bibitem{Kipnis}
\newblock C.\ Kipnis and C.\ Newman (1985).
\newblock The metastable behavior of infrequently
observed, weakly random, one-dimensional diffusion processes.
\newblock {\sl SIAM J. Appl. Math. \bf 45}:972.

\bibitem{Koro}
\newblock D.\ V.\ Korolyuk and D.\ S.\ Sil'vestrov (1984).
\newblock Entry times into asymptotically receding domains for 
ergodic Markov chains. 
\newblock {\sl Theory Prob.\ Appl.\ \bf 19} 432-442.

\bibitem{Lebo}
\newblock J.\ L.\ Lebowitz and R.\ H.\ Schonmann (1987).
\newblock On the asymptotics of
occurrence times of rare events for stochastic spin systems. 
\newblock {\sl J.\ Stat.\ Phys.\ \bf 48} 727-751.

\bibitem{Enzo1}
\newblock E.\ Olivieri and E.\ Scoppola (1995).
\newblock Markov chains with exponentially small
transition probabilities: first exit problem from a general domain. I. The
reversible case. 
\newblock {\sl J.\ Stat.\ Phys.}\ {\bf 79} 613-647.

\bibitem{Enzo2} 
\newblock E.\ Olivieri and E.\ Scoppola (1996) Markov chains
  with exponentially small transition probabilities: first exit problem from a
  general domain. II. The general case.  
\newblock {\sl J. Statist. Phys.}
  {\bf 84} , no. 5-6, 987--1041.

\bibitem{Paccaut}
\newblock F.\ Paccaut (1999).
\newblock Statistics of return times for weighted maps of the interval
\newblock {\sl Preprint Laboratorie de Topologie, Universit\'e de Bourgogne}.

\bibitem{Pitskel}
\newblock B.\ Pitskel (1991).
\newblock poisson law for Markov chains.
\newblock {\sl Ergod. Th. Dyn. Sys.} {\bf 11}, 501-513.

\bibitem{Shields}
\newblock P.\ Shields (1996) The ergodic theory of discrete sample paths.
\newblock {\sl Graduate Studies in Mathematics}\ {\bf Volume 13} A.M.S.

\bibitem{Adilson}
\newblock A.\ Simonis (1998).
\newblock Filling the hypercube in the supercritical
contact process in equilibrium.
\newblock {\sl Markov\ Process.\ Rel.\ Fields \bf 4}1:113-130.


\end{thebibliography}
\vskip30pt

Miguel Abadi and Antonio Galves
 
Instituto de Matem\'atica e Estat\'{\i}stica

Universidade de S\~ao Paulo

BP 66281

05315-970 S\~ao Paulo, Brasil

e-mail: {\tt abadi@ime.usp.br} and {\tt galves@ime.usp.br}

\end{document}