metric-notes/Notes_SODA23.tex

\documentclass[12pt]{article}
\usepackage{chao}
\usepackage{algorithm,algorithmicx,algpseudocode}
\renewcommand{\algorithmiccomment}[1]{\bgroup\hfill//~#1\egroup}

\begin{document}

\section{Preliminaries}

\begin{definition}
    A \textit{metric space} is a pair $(X,d_X)$ such that $X$ is a set of points and $d_X:X\times X\to R_{\geq 0}$ is a function satisfying the following properties for all $x,y,z\in X$:
    \begin{enumerate}
        \item $d_X(x,y)=0$ if and only if $x=y$
        \item $d_X(x,y)=d_X(y,x)$
        \item $d_X(x,z)\leq d_X(x,y)+d_X(y,z)$
    \end{enumerate}
\end{definition}

\begin{definition}
    An \textit{embedding} $\alpha:X\to Y$ of a metric space $(X,d_X)$ into another metric space $(Y,d_Y)$ is defined as an injective mapping $f:X\to Y$.
\end{definition}

\begin{definition}
    An \textit{expanding embedding} $\alpha:X\to Y$ of a metric space $(X,d_X)$ into another metric space $(Y,d_Y)$ has \textit{distortion} $c\geq 1$ if for all $x,y\in X:$
    \begin{equation*}
        d_X(x,y)\leq d_Y(\alpha(x),\alpha(y))\leq c\cdot d_X(x,y).
    \end{equation*}
\end{definition}

\begin{definition}
    An embedding $\alpha: X\to Y$ of a metric space $(X,d_X)$ into another metric space $(Y,d_Y)$ is a \textit{$(k,c)$-outlier embedding} if there exists $K\subseteq X$ such that $|K|\leq k$ and $\alpha|_{X\setminus K}$ (the restriction of $\alpha$ to the domain $X\setminus K$) is an embedding of $(X\setminus K,d|_{X\setminus K})$ with distortion at most $c$.
\end{definition}

\begin{definition}
    Let $(X,d_X)$ and $(Y,d_Y)$ be two metric spaces and $\alpha_S: S\subseteq X\to Y$ be an embedding with Lipschitz constant at most $L$. Then $\alpha :X\to Y$ is a \textit{Lipschitz extension} of $\alpha_S$ with extension factor $g(|S|,|X|)$ if for all $x\in S, \alpha(x)=\alpha_S(x)$ and for all $x,y\in X$,
    \begin{equation*}
        d_Y(\alpha(x),\alpha(y))\leq g(|S|,|X|)\cdot L\cdot d_X(x,y).
    \end{equation*}
\end{definition}

\begin{definition}[\textsc{Composition of nested embeddings}]
    Let $(X,d_X)$ and $(Y,d_Y)$ be two metric spaces and $g:[0,\infty)^2 \times \mathbb{N} \to [1,\infty)$. A \textit{weak $g$-nested composition} is an algorithm that, given a set $S\subseteq X$ with $k:=|X\setminus S|$, and two expanding embeddings, $\alpha_S:S\to Y$ with distortion $c_{S}$ and $\alpha_X:X\to Y$ with distortion $c_X\geq c_S$, returns an embedding $\alpha:X\to Y$ such that,
    \begin{align*}
        &\text{for all } x,y\in S, \quad &d_X(x,y)\leq &d_Y(\alpha(x),\alpha(y))& \leq c_S&\cdot d_X(x,y),\\
        &\text{and, for all } x,y\in X, \quad & &d_Y(\alpha(x),\alpha(y))& \leq g(c_S,c_X,k)&\cdot d_X(x,y).
    \end{align*}
    We say that it is a \textit{nested composition} if the embedding $\alpha$ is additionally an expanding embedding. That is,
    \begin{align*}
        &\text{for all } x,y\in S, \quad &d_X(x,y)&\leq d_Y(\alpha(x),\alpha(y))& \leq c_S&\cdot d_X(x,y),\\
        &\text{and, for all } x,y\in X, \quad &d_X(x,y)&\leq d_Y(\alpha(x),\alpha(y))& \leq g(c_S,c_X,k)&\cdot d_X(x,y).
    \end{align*}
\end{definition}

\section{Main results and conjectures}

\begin{theorem}\label{outEmb}
    Let $(X,d_X)$ be a metric space that admits a $(k,c)$-outlier embedding. Then there exists a polynomial time algorithm $\mathcal{A}$ that, for any $\gamma >1$, finds a subset $K\subseteq X$ and an embedding $\alpha: X\setminus K\to \ell_2$ such that $\alpha$ has distortion at most $\gamma c$, and
    \begin{equation*}
        |K|\leq 2\frac{(125\cdot H_k)^2+\gamma^2}{\gamma^2-1}k
    \end{equation*}
    Choosing $\gamma=1+\varepsilon$ for $\varepsilon\in (0,1]$, in particular, provides an $\big( O(\frac{\log^2 k}{\varepsilon}k), (1+\varepsilon)c \big)$-outlier embedding from $X$ into $\ell_2$.
\end{theorem}

\begin{lemma}\label{weakNested}
    Let $(X,d_X)$ be any finite metric. Let $\alpha_S:S\to \ell_p$ be any Lipschitz embedding of $S\subseteq X$ into $\ell_p$ with $k:=|X\setminus S|$. Then there exists a weak $125H_kc_S$-nested composition from $X$ into $\ell_p$, where $H_k$ is the $k$th Harmonic number.
\end{lemma}

\begin{lemma}\label{nested}
    Let $(X,d_X)$ be any finite metric. Then there exists a $382H_kc_X$-nested composition from $X$ into $\ell_1$.
\end{lemma}

\begin{conjecture}\label{con1}
    Let $(X,d_X)$ be a metric space that admits a $(k,c)$-outlier embedding. Then there exists a polynomial time algorithm $\mathcal{A}$ that, for any $\varepsilon>0$, finds an $\big( O(\frac{k}{\varepsilon}), (1+\varepsilon)c \big)$-outlier embedding from $X$ into $\ell_2$.
\end{conjecture}

\begin{conjecture}\label{con2}
    Let $(X,d_X)$ be a metric space that admits a $(k,c)$-outlier embedding. Then there exists a polynomial time algorithm $\mathcal{A}$ that, for any $\varepsilon>0$, finds an $\big( O(\frac{f(k)}{\varepsilon}k), (1+\varepsilon)c \big)$-outlier embedding from $X$ into $\ell_p$.
\end{conjecture}

Due to the limitation of SDP, conjecture \ref{con2} requires an entirely different framework, so we maintain a skeptical perspective regarding this conjecture.

\begin{conjecture}\label{con3}
    Let $(X,d_X)$ be any finite metric. Then there exists a $f(k)c_X$-nested composition from $X$ into $\ell_p$.
\end{conjecture}

Intuitively, Conjecture \ref{con3} can be proved based on the framework of Lemma \ref{nested}. The proof should mainly involve changes in parameters. However, once there is a problem that cannot be covered by that framework, the conjecture should be unverifiable.

\section{For Lemma \ref{weakNested}}

\begin{lemma}\label{lipbounds}
    In Algorithm \ref{Alg:Lip}, we have the following bounds on the expansion for each pair $x,y\in X$:
    \begin{enumerate}
        \item If $x,y\in S$, then $d_Y(\alpha(x),\alpha(y))\leq c_S \cdot d_X(x,y)$.
        \item If $x\in S$ and $y\in X\setminus S$, then $d_Y(\alpha(x),\alpha(y))\leq 10c_S \cdot d_X(x,y)$.
        \item If $x,y\in X\setminus S$ and $d_X(x,\gamma(x))\leq 2\cdot d_X(x,y)$, then $d_Y(\alpha(x),\alpha(y))\leq 50c_S \cdot d_X(x,y)$.
        \item If $x,y\in X\setminus S$ and $d_X(x,\gamma(x)),d_X(y,\gamma(y))> 2\cdot d_X(x,y)$, then $E_{\alpha}[d_Y(\alpha(x),\alpha(y))]\leq 125c_S\cdot H_k \cdot d_X(x,y)$.
    \end{enumerate}
\end{lemma}

\begin{algorithm}[H]
    \renewcommand{\algorithmicrequire}{\textbf{Input:}}
    \renewcommand{\algorithmicensure}{\textbf{Output:}}
    \caption{Lipschitz extension}
    \label{Alg:Lip}
    \begin{algorithmic}[1]
        \Require Metric spaces $(X,d_X)$ and $(Y, d_Y)$, $|X|=n$, subset $S \subseteq X$ and embedding $\alpha_S : S \to Y$ with distortion $c_S$
        \Ensure A randomized embedding $\alpha : X \to Y$ such that for all $x, y \in X$, $E[d_Y(\alpha(x), \alpha(y))] \leq 125H_k c_S \cdot d_X(x, y)$ for $k = |X \setminus S|$, and for all $x \in S$, $\alpha(x) = \alpha_S(x)$
        \State $K \leftarrow X \setminus S$.
        \State Define $\gamma : K \to S$ such that $\gamma(u) \in \arg\min_{v \in S} d_X(u,v)$.
        \State Select $b$ uniformly at random from the range $[2, 4]$.
        \State Select a uniformly random permutation $\pi : K \to [k]$ of the vertices in $K$
        \For{$i = 1$ to $k$}
            \State $u_i \leftarrow \pi^{-1}(i)$
            \State $K_i \leftarrow \{v \in K \mid d_X(v, u_i) \leq b \cdot d_X(v, \gamma(v))\}$
            \State $K \leftarrow K \setminus K_i$
        \EndFor
        \State Define an embedding $\alpha : X \to Y$ such that \begin{equation*} \alpha(v) =
            \begin{cases}
                \alpha_S(v) & \text{if } v \in S \\
                \alpha_S(\gamma(u_i)) & \text{if } v \in K_i
            \end{cases}
        \end{equation*}
        \State Output $\alpha$
    \end{algorithmic}
\end{algorithm}

\begin{proof}[of Lemma \ref{lipbounds}]
When $x,y\in S$, the corresponding inequality holds by the definition of $\alpha$.

When $x\in S$ and $y\in X\setminus S$, let $y\in K_i$. By the definition of $\alpha$, we have $d_Y(\alpha(x),\alpha(y))=d_Y(\alpha(x),\alpha(\gamma(u_i)))\leq c_S\cdot d_X(x,\gamma(u_i))$. By the first case of Lemma \ref{lipbounds}, the definition of $\gamma$ and the definition of $K_i$, we have
\begin{align*}
    d_X(x,\gamma(u_i))&\leq d_X(x,u_i)+d_X(u_i,\gamma(u_i))\\
    &\leq 2\cdot d_X(x,u_i)\\
    &\leq 2\cdot(d_X(x,y)+d_X(y,u_i))\\
    &\leq 2\cdot(d_X(x,y)+b\cdot d_X(y,\gamma(y)))\\
    &\leq (2+2b)\cdot d_X(x,y)\\
    &\leq 10\cdot d_X(x,y)
\end{align*}

When $x,y\in X\setminus S$ and $d_X(x,\gamma(x))\leq 2\cdot d_X(x,y)$, by the second case of Lemma \ref{lipbounds}, we have
\begin{align*}
    d_Y(\alpha(x),\alpha(y))&\leq d_Y(\alpha(x),\alpha(\gamma(x)))+d_Y(\alpha(\gamma(x)),\alpha(y))\\
    &\leq (2+2b)c_S\cdot (d_X(x,\gamma(x))+d_X(\gamma(x),y))\\
    &\leq (2+2b)c_S\cdot (2d_X(x,\gamma(x))+d_X(x,y))\\
    &\leq (10+10b)c_S\cdot d_X(x,y)\\
    &\leq 50c_S\cdot d_X(x,y)
\end{align*}

When $x,y\in X\setminus S$ and $d_X(x,\gamma(x)),d_X(y,\gamma(y))> 2\cdot d_X(x,y)$, let $x\in K_i$ and $y\in K_j$.
If $i= j$, then we have $d_Y(\alpha(x),\alpha(y))=0$. If $i\ne j$, we denote it to be $x\sim y$. By the second case of Lemma \ref{lipbounds}, we have
\begin{align*}
    d_Y(\alpha(x),\alpha(y))&\leq d_Y(\alpha(x),\alpha(\gamma(x)))+d_Y(\alpha(\gamma(x)),\alpha(y))\\
    &\leq (2+2b)c_S\cdot (d_X(x,\gamma(x))+d_X(\gamma(x),y))\\
    &\leq (2+2b)c_S\cdot (2d_X(x,\gamma(x))+d_X(x,y))\\
    &\leq (5+5b)c_S\cdot d_X(x,\gamma(x))\\
    &\leq 25c_S\cdot d_X(x,\gamma(x))
\end{align*}
\begin{itemize}
    \item If $x\sim y$, suppose that $i<j$ and define that
    \begin{equation*}
        \beta_{\pi^{-1}(s)}=\min\Big\{\frac{d_X(x,u_s)}{d_X(x,\gamma(x))},\frac{d_X(y,u_s)}{d_X(y,\gamma(y))}\Big\}, ~~\forall s\in [1,k]
    \end{equation*}
    Since $x\in K_i$ and $y\in K_j$, then
    \begin{equation*}
        \frac{d_X(x,u_i)}{d_X(x,\gamma(x))}\leq b <\frac{d_X(y,u_i)}{d_X(y,\gamma(y))}
    \end{equation*}
    and
    \begin{equation*}
        \beta_{\pi^{-1}(s)}>b\geq \beta_{\pi^{-1}(i)}, ~~\forall s\in [1,i)
    \end{equation*}
\end{itemize}
Consider a fixed pair $(x,y)$ satisfying that $d_X(x,\gamma(x)),d_X(y,\gamma(y))> 2\cdot d_X(x,y)$. For some center $u$, let $E_u$ be the event that $\beta_{\pi^{-1}(s)}>\beta_{u}$ for $1\leq s<\pi(u)$. Define an order $\textit{index}:K\to [k]$ such that $\text{index}(u)\leq \text{index}(v)$ if and only if $\beta_u\leq \beta_v$. Then $\Pr_{\pi}[E_u]=1/\text{index}(u)$ (which is proved in Appendix \ref{ComE}). For any fixed center $u$ such that $d_X(x,u)\leq 4\cdot d_X(x,\gamma(x))$ and some parameter $b\in [2,4]$, let $E_b$ be the event that
\begin{equation*}
    \frac{d_X(x,u)}{d_X(x,\gamma(x))}\leq b <\frac{d_X(y,u)}{d_X(y,\gamma(y))}
\end{equation*}
Then \begin{align*}
    \Pr_b [E_b]&\leq \frac{1}{2}\cdot \Big(\frac{d_X(y,u)}{d_X(y,\gamma(y))}-\frac{d_X(x,u)}{d_X(x,\gamma(x))}\Big)\\
    &\leq \frac{1}{2}\cdot \Big(\frac{d_X(x,u)+d_X(x,y)}{d_X(x,\gamma(y))-d_X(x,y)}-\frac{d_X(x,u)}{d_X(x,\gamma(x))}\Big)\\
    &\leq \frac{1}{2}\cdot \Big(\frac{d_X(x,u)+d_X(x,y)}{d_X(x,\gamma(x))-d_X(x,y)}-\frac{d_X(x,u)}{d_X(x,\gamma(x))}\Big)\\
    &= \frac{1}{2}\cdot \frac{d_X(x,y)\cdot \big(d_X(x,\gamma(x))+d_X(x,u)\big)}{\big(d_X(x,\gamma(x))-d_X(x,y)\big)\cdot d_X(x,\gamma(x))}\\
    &\leq 5\cdot \frac{d_X(x,y)}{d_X(x,\gamma(x))}
\end{align*}
where the third inequality is by the definition of $\gamma$, and the final inequality is by the facts that $d_X(x,u)\leq 4\cdot d_X(x,\gamma(x))$ and $d_X(x,\gamma(x))> 2\cdot d_X(x,y)$.

Thus,
\begin{align*}
    \Pr_{\pi,b}[x\sim y]&=\sum_{u\in K}\Pr_{b}[x\sim y|E_u]\cdot \Pr_{\pi}[E_u]\\
    &\leq \sum_{u\in K} \Pr_b [E_b]\cdot \Pr_{\pi}[E_u]\\
    &\leq \sum_{u\in K}5\cdot \frac{d_X(x,y)}{d_X(x_u,\gamma(x_u))}\cdot \frac{1}{\text{index}(u)}
\end{align*}
where $x_u$ is the vertex in $\{x,y\}$ satisfying that $x_u\in K_{\pi(u)}$, and the first inequality is by the fact that $E_b$ is a necessary condition of $x\sim y$ when $E_u$ holds.

Finally,
\begin{align*}
    E_{\pi,b}[d_Y(\alpha(x),\alpha(y))]&\leq=\Pr_{\pi,b}[x\sim y]\cdot 25c_s\cdot d_X(x_u,\gamma(x_u))\\
    &\leq 125c_s\cdot d_X(x,y)\cdot \sum_{u\in K} \frac{1}{\text{index}(u)}\\
    &=125c_s\cdot H_k\cdot d_X(x,y)
\end{align*}

\end{proof}

\section{Main trouble for conjecture \ref{con1}}\label{Trouble}

When we cluster a set of outliers $K_i$ and embed them into a fixed non-outlier $\gamma(u_i)$ (i.e., $\alpha(v)=\alpha_S(\gamma(u_i)), \forall v\in K_i$), the troublesome case is that $x\in K_i,y\notin K_i$ but $d_X(x,y)\approx 0$.

The original paper addresses this troublesome case by incorporating the harmonic number $H_k$ into their bounds. Specifically, they handle the scenario by randomly selecting the clustering center and demonstrating that the troublesome case occurs with low probability. Since this low probability is proportional to $d_X(x,y)$, they derive the bound:
\begin{equation*}
    \mathbb{E}_{\alpha}[d_Y(\alpha(x),\alpha(y))]\leq 125 c_s H_k\cdot d_X(x,y).
\end{equation*}
In contrast, for the non-troublesome case, they prove a much better bound:
\begin{equation*}
    d_Y(\alpha(x),\alpha(y))\leq 50 c_s\cdot d_X(x,y).
\end{equation*}

We have tried to handle the troublesome case via non-probabilistic methods, but these attempts failed. For any fixed clustering rules (including merging two nearby clusters), the troublesome case cannot be fully covered. In our attempts, any fixed clustering rules cannot prevent all arbitrarily close outliers from being assigned to distinct clusters.

\section{Other variants of the problem}

\begin{conjecture}\label{con4}
    Let $(X,\|\cdot\|_p)$ be a metric space and $p\ne 2$. Let $\alpha: S \to \ell_2$ be any Lipschitz embedding of $S\subseteq X$ into $\ell_2$ with $k=|X\setminus S|$. Then there exists a weak $m\cdot c_S$-nested composition from $X$ into $\ell_2$, where $m$ is a constant.
\end{conjecture}

When the input metric space is in $\ell_p$, the troublesome case in Section \ref{Trouble} still exists and is difficult to handle. If we cannot eliminate the harmonic number $H_k$, the restrictions on the input metric space make less sense.

\begin{definition}
    Let $X\subset \mathbb{R}^{m}$ be a set with $|X|=n$. An embedding $\alpha: X\to \mathbb{R}^l$ is a \textit{$(k,c,l)$-outlier embedding} if there exists $K\subseteq X$ such that $|K|\leq k$ and $\alpha|_{X\setminus K}$ (the restriction of $\alpha$ to the domain $X\setminus K$) is an embedding of $(X\setminus K,d|_{X\setminus K})$ with distortion at most $c$.
\end{definition}

\clearpage
\appendix

\section{(Unfinished) New Lemma \ref{weakNested}}

\begin{algorithm}[H]
    \renewcommand{\algorithmicrequire}{\textbf{Input:}}
    \renewcommand{\algorithmicensure}{\textbf{Output:}}
    \caption{Lipschitz extension}
    \label{Alg:NewLip}
    \begin{algorithmic}[1]
        \Require Metric spaces $(X,d_X)$ and $(Y, d_Y)$, $|X|=n$, subset $S \subseteq X$ and embedding $\alpha_S : S \to Y$ with distortion $c_S$, parameter $b>0$
        \Ensure A randomized embedding $\alpha : X \to Y$ such that for all $x, y \in X$, $E[d_Y(\alpha(x), \alpha(y))] \leq 125H_k c_S \cdot d_X(x, y)$ for $k = |X \setminus S|$, and for all $x \in S$, $\alpha(x) = \alpha_S(x)$
        \State $K \leftarrow X \setminus S$.
        \State Define $\gamma : K \to S$ such that $\gamma(u) \in \arg\min_{v \in S} d_X(u,v)$.
        \State Let $K=\{u_1,u_2,\dots,u_k\}$ such that $d_X(u_i,\gamma(u_i))\leq d_X(u_j,\gamma(u_j))$ for any $1\leq i<j\leq k$.
        \State $U\gets K$
        \For{$i = 1$ to $k$}
            \State $U_i \leftarrow \{v \in U \mid d_X(u_i, \gamma(u_i)) \leq d_X(v, \gamma(u_i)) \leq b \cdot d_X(v, \gamma(v))\}$
            \State $U \leftarrow U \setminus U_i$
        \EndFor
        \For{$i = 1$ to $k-1$}
            \For{$j = i+1$ to $k$}
                \If{$U_i,U_j\ne \emptyset$ and $d_X(u_j,\gamma(u_i))\geq m\cdot d_X(u_i,u_j)$}
                    \State $U_i \gets U_i\cup U_j$
                    \State $U_j \gets \emptyset$
                \EndIf
            \EndFor
        \EndFor
        % \While{there are two non-empty sets $U_i$ and $U_j$ such that $d_X(u_s,\gamma(u_s))>2\cdot d_X(u_i,u_j), s\in\{i,j\}$ and $1\leq i<j\leq k$}
        %     \State $U_i \gets U_i\cup U_j$
        %     \State $U_j \gets \emptyset$
        % \EndWhile
        \State Define an embedding $\alpha : X \to Y$ such that \begin{equation*} \alpha(v) =
            \begin{cases}
                \alpha_S(v) & \text{if } v \in S \\
                \alpha_S(\gamma(u_i)) & \text{if } v \in U_i
            \end{cases}
        \end{equation*}
        \State Output $\alpha$
    \end{algorithmic}
\end{algorithm}
We call the lines $10$-$17$ merging sets. For the sake of analysis, we use $\tilde{U}_i$ to represent $U_i$ before merging sets.

\begin{lemma}\label{new0}
    In Algorithm \ref{Alg:NewLip}, if there exists $i<j$ such that $\tilde{U}_j\subseteq U_i$, then $d_X(u_j,\gamma(u_i))\leq \frac{m}{m-1}\cdot d_X(u_j,\gamma(u_j))$ and $d_X(u_i,u_j)\leq \frac{1}{m-1} \cdot d(u_i,\gamma(u_i))$.
\end{lemma}

\begin{proof}
    Since $\tilde{U}_j\subseteq U_i$, then $d_X(u_j,\gamma(u_i))\geq m\cdot d_X(u_i,u_j)$. Since $i<j$, then $d_X(u_i,\gamma(u_i))\leq d_X(u_j,\gamma(u_j))$. So
    \begin{align*}
        d_X(u_j,\gamma(u_i))&= \frac{m}{m-1}\cdot d_X(u_j,\gamma(u_i))- \frac{1}{m-1}\cdot d_X(u_j,\gamma(u_i))\\
        &\leq \frac{m}{m-1}\cdot d_X(u_j,\gamma(u_i))- \frac{m}{m-1}\cdot d_X(u_i,u_j)\\
        &\leq \frac{m}{m-1}\cdot d_X(u_i,\gamma(u_i))\\
        &\leq \frac{m}{m-1}\cdot d_X(u_j,\gamma(u_j)).
    \end{align*}
    and
    \begin{align*}
        d_X(u_i,u_j)&= \frac{m}{m-1}\cdot d_X(u_i,u_j)- \frac{1}{m-1}\cdot d_X(u_i,u_j)\\
        &\leq \frac{1}{m-1} \cdot d_X(u_j,\gamma(u_i))- \frac{1}{m-1}\cdot d_X(u_i,u_j)\\
        &\leq \frac{1}{m-1} \cdot d(u_i,\gamma(u_i)).
    \end{align*}
\end{proof}


\begin{lemma}\label{new1}
    In Algorithm \ref{Alg:NewLip}, if there exists $i<j$ such that $\tilde{U}_j\nsubseteq U_i$, then $d_X(u_i,\gamma(u_i))< 2\cdot d_X(u_i,u_j)$.
\end{lemma}

\begin{proof}
    Since $\tilde{U}_j\nsubseteq U_i$, then $d_X(u_j,\gamma(u_i))< m\cdot d_X(u_i,u_j)$. Since $i<j$, then $d_X(u_i,\gamma(u_i))\leq d_X(u_j,\gamma(u_j))$. So
    \begin{align*}
        d_X(u_i,\gamma(u_i))&\leq d_X(u_j,\gamma(u_j))\\
        &\leq d_X(u_j,\gamma(u_i))\\
        &< m\cdot d_X(u_i,u_j).
    \end{align*}
\end{proof}


\begin{lemma}\label{new2}
    In Algorithm \ref{Alg:NewLip}, if $x,y\in S$, then $d_Y(\alpha(x),\alpha(y))\leq c_S \cdot d_X(x,y)$.
\end{lemma}

\begin{lemma}\label{new3}
    In Algorithm \ref{Alg:NewLip}, if $x\in S$ and $y\in X\setminus S$, then $d_Y(\alpha(x),\alpha(y))\leq (4b+1)c_S \cdot d_X(x,y)$.
\end{lemma}

\begin{proof}
    Let $y\in U_i$. By Lemma \ref{new2} and the definition of $\alpha$, $d_Y(\alpha(x),\alpha(y))=d_Y(\alpha(x),\alpha(\gamma(u_i))\leq c_s\cdot d_X(x,\gamma(u_i))$. Since $y\in U_i$, there are two following cases:
    \begin{enumerate}
        \item $y\in \tilde{U}_i$.
        \item $y\in \tilde{U}_j\subseteq U_i$ and $i<j$.
    \end{enumerate}

    For the first case, by the definitions of $\tilde{U}_i$, we have $d_X(y,\gamma(u_i))\leq b\cdot d_X(y,\gamma(y))$. Then by the definitions of $\gamma$ and $x\in S$, we have
    \begin{align*}
        d_X(x,\gamma(u_i))&\leq d_X(x,y)+d_X(y,\gamma(u_i))\\
        &\leq d_X(x,y)+b\cdot d_X(y,\gamma(y))\\
        &\leq (1+b)\cdot d_X(x,y)
    \end{align*}

    For the second case, by the definitions of $\tilde{U}_j$, we have $d_X(u_j, \gamma(u_j))\leq d_X(y,\gamma(u_j))\leq b\cdot d_X(y,\gamma(y))$. By Lemma \ref{new0}, we have $d_X(u_j,\gamma(u_i))\leq 2\cdot d_X(u_j,\gamma(u_j))$. Then by the definitions of $\gamma$ and $x\in S$, we have
    \begin{align*}
        d_X(x,\gamma(u_i))&\leq d_X(x,y)+d_X(y,\gamma(u_j))+d_X(\gamma(u_j), u_j)+d_X(u_j,\gamma(u_i))\\
        &\leq d_X(x,y)+b\cdot d_X(y,\gamma(y))+3\cdot d_X(u_j,\gamma(u_j))\\
        &\leq d_X(x,y)+b\cdot d_X(y,\gamma(y))+3b\cdot d_X(y,\gamma(y))\\
        &\leq (4b+1)\cdot d_X(x,y)
    \end{align*}
\end{proof}

\begin{lemma}\label{new4}
    In Algorithm \ref{Alg:NewLip}, if $x,y\in X\setminus S$, then $d_Y(\alpha(x),\alpha(y))\leq ?c_S \cdot d_X(x,y)$.
\end{lemma}

\begin{proof}
    Let $y\in U_i$ and $x\in U_j$. By Lemma \ref{new3} and the definition of $\alpha$,
    \begin{align*}
        d_Y(\alpha(x),\alpha(y))&\leq d_Y(\alpha(\gamma(u_i)), \alpha(x))+d_Y(\alpha(\gamma(u_i)),\alpha(y))\\
        &\leq (4b+1)c_s\cdot (d_X(\gamma(u_i), x)+d_X(\gamma(u_i),y))\\
        &\leq (4b+1)c_s\cdot (2d_X(\gamma(u_i),x)+d_X(x,y))
    \end{align*}

    % When $i=j$, the proof is trivial. Suppose that $i<j$. There are four following cases:
    % \begin{enumerate}
    %     \item $y\in \tilde{U}_i$.
    %     \item $y\in \tilde{U}_s\subseteq U_i$ and $s<i$.
    % \end{enumerate}

    % For the first case, by the definitions of $\tilde{U}_j$, we have $d_X(u_j, \gamma(u_j))\leq d_X(y,\gamma(u_j))\leq b\cdot d_X(y,\gamma(y))$. By Lemma \ref{new1}, we have $d_X(u_i,\gamma(u_i))< 2\cdot d_X(u_i,u_j)$
    % \begin{align*}
    %     d_X(x,\gamma(u_i))&\leq d_X(x,y)+d_X(y,\gamma(u_j))+d_X(\gamma(u_j), u_i)+d_X(u_i,\gamma(u_i))\\
    %     &< d_X(x,y)+d_X(y,\gamma(u_j))+d_X(\gamma(u_j), u_i)+2\cdot d_X(u_i,u_j)\\
    %     &\leq d_X(x,y)+d_X(y,\gamma(u_j))+3d_X(\gamma(u_j), u_i)+2\cdot d_X(\gamma(u_j),u_j)\\
    %     &\leq d_X(x,y)+d_X(y,\gamma(u_j))+3d_X(\gamma(u_j), u_i)+2b\cdot d_X(\gamma(y),y)
    % \end{align*}

    % For the second case, let $y\in \tilde{U}_t$. Since $y\in U_j\ne U_i$, we know that $d_X(u_t,\gamma (u_i))>2\cdot d_X(u_i,u_t)$ and $d_X(u_j,\gamma (u_i))>2\cdot d_X(u_i,u_j)$.
\end{proof}

\section{Combinatorial Equality}\label{ComE}

The following is for $\text{Pr}_{\pi}[E_u]=1/\text{index}(u)$ , and here suppose that $\text{index}(u)=k+1$.

\begin{align*}
    \frac{(n-1)!+(n-k-1)(n-2)!+\dots+(n-k-1)!k!}{n!}&=\frac{1}{k+1}\\
    \frac{(n-k-1)!}{n!}\sum_{i=1}^{n-k} \frac{(n-i)!}{(n-k-i)!}&=\frac{1}{k+1}\\
    \sum_{i=1}^{n-k} \frac{(n-i)!}{(n-k-i)!}&=\frac{n!}{(k+1)(n-k-1)!}\\
    \sum_{i=1}^{n-k} \frac{(n-i)!}{(n-k-i)!k!}&=\frac{n!}{(k+1)!(n-k-1)!}\\
    \sum_{i=1}^{n-k} C_{n-i}^{k}&=C_{n}^{k+1}
\end{align*}

\end{document}