\documentclass[12pt]{article}
\usepackage{chao}

\title{Outlier Embedding Notes}

\begin{document}

\section{Better Distortion with Distribution}
There is a well known lowerbound for the distortion of embedding a metric space $(X,d)$ into $\ell_1$.

\begin{theorem}
For any metric space $(X,d)$ on $n$ points, one has
\[(X,d) \lhook\joinrel\xrightarrow{\Omega(\log n)} \ell_1. \]
\end{theorem}

For $\ell_2$ the lowerbound is still $\Omega(\log n)$
\footnote{\url{https://web.stanford.edu/class/cs369m/cs369mlecture1.pdf}}.

Recall that we want to find a $(O(k),(1+\e)c)$-outlier embedding into $\ell_2$ for any metric space $(X,d)$ which admits a $(k,c)$-outlier embedding into $\ell_2$. If we can do this deterministically, we actually find an embedding of the outlier points into $\ell_2$ with distortion $O(k)$, which contradicts the lowerbound. However, maybe we can do $O(k)$ via embedding into some distribution of $\ell_2$ metrics.

\begin{definition}[Expected distortion.]
Let $(X,d)$ be the original metric space and let $\mathcal Y=\{ (Y_1,d_1),\ldots (Y_h,d_h) \}$ be a set of target spaces. Let $\pi$ be a distribution of embeddings into $\mathcal Y$. To be more precise, for each target space $(Y_i,d_i)$ we define an embedding $\alpha_i:X\to Y_i$ and define the probability of choosing this embedding to be $p_i$. The original metric space $(X,d)$ embeds into $\pi$ with distortion $D$ if there is an $r>0$ such that for all $x,y\in X$,
\[r\leq \frac{\E_{i\from \pi} [d_i(\alpha_i(x),\alpha_i(y))]}{d(x,y)}\leq Dr.\]
\end{definition}

SODA23 paper also embeds $(X,d)$ into distribution. We call this kind of embeddings stochastic embedding.

\paragraph{Example: Random Trees}
Consider the problem of embedding some finite metric into a tree metric. We can get an $O(n)$ lowerbound via the unit edge length cycle $C_n$. However, if embedding into distortions is allowed, we can do $O(\log n)$.

\begin{theorem}[Bartal]
Let $(X,d)$ be a metric space on $n$ points, let $\mathcal D T$ be the set of tree metrics that dominate $d$, there is a distribution $\pi$ on $\mathcal D T$ such that $(X,d)$ embeds into $\pi$ with distortion $O(\log n)$.
\end{theorem}

Is there any other known result on expected distortion of embeddings besides Bartal's theorem?

% A kind of embedding problems which are closely related to outlier embeddings is Ramsey type embedding. Let $(X,d_X)$ be the original metric space and let $(Y,d_Y)$ be the target space. Given a fixed distortion $c$, Ramsey type embedding asks for the largest subset $Z$ of $X$ such that $(Z,d_X)$ embeds into $(Y,d_Y)$ with distortion at most $c$. This is the same as computing the smallest outlier set.

\section{Stochastic Embedding into \texorpdfstring{$\ell_2$}{l2}}
We first ignore the outlier condition and see if stochastic embeddings break the $\Omega(\log n)$ lowerbound.

\begin{theorem}[Bourgain]
For any metric space $(X,d)$ and for any $p$, there is an embedding of $(X,d)$ into $\ell_p^{O(\log^2 n)}$ with distortion $O(\log n)$.
\end{theorem}

Bourgain develops a randomized algorithm that finds a desired embedding.\footnote{The expansion bound always holds. The contraction bound holds with probability at least $1/2$. See \url{https://home.ttic.edu/~harry/teaching/pdf/lecture3.pdf}} For the $\ell_2$ case, the embedding has the following bounds:
\begin{enumerate}
\item Expansion. $\|f(x)-f(y)\|_2\leq O(\log n) d(x,y)$
\item Contraction. $\|f(x)-f(y)\|_2 \geq \frac{d(x,y)}{O(1)}$
\end{enumerate}

The contraction bound is almost tight. Let $K$ be the dimension of the target space. For the expansion bound, we have

\begin{equation*}
\begin{aligned}
\|f(x)-f(y)\|_2 &= \left( \sum_{i=1}^{K} |f_i(x)-f_i(y)|^2\right)^{1/2}\\
    &\leq \left( \sum_{i=1}^{K} d(x,y)^2\right)^{1/2}\\
    &=\sqrt{K} d(x,y)\\
    &=O(\log n) d(x,y)
\end{aligned}
\end{equation*}

One thing we can try is to tighten the second line. 
Recall that for each dimension $i$ a random subset $S_i\subset X$ is selected and the value of $f_i(x)$ is $\min_{s\in S_i} d(x,s)$.
We want to show that for any fixed $x,y\in X$ and any dimension $i$ the event that distance $|f_i(x)-f_i(y)|^2$ is much smaller than $d(x,y)^2$ happens with high probability.

Now consider a subset $S_j$ by sampling each node in $X$ iid with probability $2^{-j}$. We independently repeat this process $m=576\log n$ times and denote by $S_{ij}$ the sampled set for $i\in [m]$. A~free lemma is the following.

\begin{lemma}
For fixed $x,y\in X$ and $j$,
\[
\Pr[\text{for at least $18\log n$ values of $i$, $|f_{ij}(x)-f_{ij}(y)|\geq (\rho_j -\rho_{j-1})$}]\geq 1-\frac{1}{n^3},
\]
where $\rho_j$ is the smallest radius for which $|B(x,\rho_j)|\geq 2^j$ and $|B(y,\rho_j)|\geq 2^j$.
\end{lemma}

\end{document}