metric-notes/SDP.tex

\documentclass[12pt]{article}
\usepackage{chao}

\title{Outlier Embedding Notes}

\newcommand{\scut}{\textsc{Sparsest Cut}}
\newcommand{\iprod}[2]{\langle #1, #2\rangle}
\DeclareMathOperator{\las}{\textsc{Las}}

\begin{document}

Try to improve the SDP and rounding part.

\section{Lasserre Hierarchy}

In \cite{chawla_composition_2023} the following SDP is considered.

\begin{equation}\label{outlierSDP}
\begin{aligned}
\min_{v,\delta}&   &   \sum_x \delta_x&    &   &\\
s.t.&   &   (1-\delta_x - \delta_y) d^2(x,y)\leq \|v_x-v_y\|^2 &\leq (c^2+(\delta_x+\delta_y)f(k)) d^2(x,y) &   &\forall x,y\in X\\
    &   &   \delta_x\in [0,1], v_x&\in \R^p   &   &\forall x\in X
\end{aligned}
\end{equation}

$\delta_x\in \set{0,1}$ indicates whether $x\in X$ is an outlier point and is relaxed. Is it possible to apply Lasserre hierarchy on $\delta_x$ only?

The idea is to consider $\delta_x\in [0,1]$ as a probability of $x=1$ and try to recover the joint distribution on $[x]_{x\in X}$. We use the definition in \cite{guruswami_approximating_2013}.
\begin{definition}[Lasserre hierarchy]
Given a variable set $V$ and a positive integer $t$, a set of vectors $x$ is said to satisfy the $t$ round Lasserre hierarchy (denoted by $x\in\las_t(V)$) if $x$ satisfies the following conditions:
\begin{enumerate}
\item For all $S\in \binom{V}{\leq t+1}$, there exists a mapping $x_S(\cdot):\set{0,1}^S \to \R^\gamma$, which assigns a vector to every possible 0-1 labeling of $S$. Note that this represents all possible events involving at most $r+1$ variables. For a labeling $f\in \set{0,1}^S$ and element $s\in S$ we use $f(s)$ for the label of $s$. For small cardinality $S$ we write $x_{\set{u,v}}(0,1)$ for $x_{\set{u,v}}(f)$ where $f(\set{u,v})=(0,1)$.
\item $\|x_{\emptyset}\|^2=1$.
\item $\langle x_S(f),x_T(g)\rangle=0$ if $f(S)$ and $g(T)$ are inconsistent, \ie{} there exists $u\in S\cap T$ such that $f(u)\neq g(u)$.
\item $\langle x_S(f),x_T(g)\rangle=\langle x_{S'}(f'),x_{T'}(g')\rangle$ if $S\cup T=S'\cup T'$ and the labels are consistent.
\item $\|x_u(0)\|^2+\|x_u(1)\|^2=\|x_{\emptyset}\|^2=1$.
\end{enumerate}
\end{definition}

\begin{remark}
For 3 and 4 one should understand the inner product $\langle x_S(f),x_T(g)\rangle$ as the probability $\Pr[\bigwedge_{s\in S}v_s=f(s)\bigwedge_{t\in T}v_t=g(t)]$.
\end{remark}

\begin{lemma}[pseudo probability]
Let $x\in \las_t(V)$ for $t\geq 0$. Then the following holds:
\begin{enumerate}
\item $\|x_S(f)\|^2 \in [0,1]$ for all $|S|\leq t+1$.
\item $\|x_S(f)\|^2 \leq \|x_T(g)\|^2$ if $T\subset S$ and $f(t)=g(t)$ for all $t\in T$.
\item $\|x_S(f)\|^2 = \sum_{h\in \set{0,1}^{T-S}} \|x_T(f\land h)\|^2$ if $S\subset T$.
\item If $S\in \binom{V}{\leq t}$, $f\in \set{0,1}^S$ and $u\notin S$, then $x_{S+u}(f\land u=1)+x_{S+u}(f\land u=0)=x_{S}(f)$.
\end{enumerate}
\end{lemma}
\begin{proof}
Let $N_t=\sum_{r=0}^{t+1}\binom{V}{r}2^r$ be the number of vectors in $x$.
Consider the moment matrix $M_t\in \R^{N_t\times N_t}$, where each entry $M_t[f(S),g(T)]$ is $\langle x_S(f),x_T(g)\rangle$. The moment matrix is positive semidefinite since vectors in $x$ form a Gram decomposition of $M_t$.
\begin{enumerate}
\item Consider the following submatrix of $M_t$.
\[\begin{bmatrix}
\iprod{x_\emptyset}{x_\emptyset}    & \iprod{x_\emptyset}{x_S(f)}\\
\iprod{x_S(f)}{x_\emptyset}         & \iprod{x_S(f)}{x_S(f)}
\end{bmatrix}\succeq 0\]
Computing the determinant gives us $\|x_S(f)\|^2(1-\|x_S(f)\|^2)\geq 0$.
\item Again consider certain submatrix of $M_t$. \[\begin{bmatrix}
\iprod{x_T(g)}{x_T(g)}  & \iprod{x_T(g)}{x_S(f)}\\
\iprod{x_S(f)}{x_T(g)}  & \iprod{x_S(f)}{x_S(f)}
\end{bmatrix}\succeq 0\]
The determinant is $\|x_S(f)\|^2(\|x_T(g)\|^2-\|x_S(f)\|^2)\geq 0$.
\item We only need to show $\|x_S(f)\|^2=\|x_{S+u}(f\land u=0)\|^2 +\|x_{S+u}(f\land u=1)\|^2$ and the rest follows by induction. Note that $x_u(0)+x_u(1)=x_\emptyset$ since we have $\|x_u(0)\|^2+\|x_u(1)\|^2=\|x_{\emptyset}\|^2$ and they are orthogonal.
\begin{equation*}
\begin{aligned}
\|x_{S+u}(f\land u=0)\|^2 +\|x_{S+u}(f\land u=1)\|^2 &= \iprod{x_S(f)}{x_u(0)}+\iprod{x_S(f)}{x_u(1)}\\
&= \iprod{x_S(f)}{x_u(0)+x_u(1)}\\
&= \iprod{x_S(f)}{x_\emptyset}=\|x_S(f)\|^2
\end{aligned}
\end{equation*}
\item Notice that $x_{S+u}(f\land u=1)$ and $x_{S+u}(f\land u=0)$ are orthogonal. Denote by $x_S(f')$ the projection of $f$ on the hyperplane spanned by $x_{S+u}(f\land u=1)$ and $x_{S+u}(f\land u=0)$. One can verify that $f'=x_{S+u}(f\land u=1)+x_{S+u}(f\land u=0)$. Then it remains to show $\langle x_S(f'), x_S(f)\rangle=\|x_S(f)\|^2$, which immediately follows from 3.
\end{enumerate}
\end{proof}

Now we are ready to look into \autoref{outlierSDP} with $\las_t$.

\section{Better SDP}
In \cite{chawla_composition_2023} the rounding part is first showing that if there is a $(k,c)$-outlier embedding for $(X,d)$ into $\ell_2$ then there must be a solution to \autoref{outlierSDP} with $\sum_x \delta_x=k$ and $f(k)=125cH_k$; then solving the SDP and rounding the fractional optimal solution. I think we cannot do better with the same SDP formulation since for fixed $f(k)$ their rounding is very tight and it seems not easy to improve the $125cH_k$.

... It seems hard to improve the SDP part
\begin{comment}
Trying the simplest (and the most natural) formulation.

\begin{equation}\label{outlierLAS}
\begin{aligned}
\min_{v,\delta}&   &   \sum_x \delta_x&    &   &\\
s.t.&   &   (1-\delta_x)(1 - \delta_y) d^2(x,y)\leq \|v_x-v_y\|^2 &\leq (1-\delta_x)(1 - \delta_y)c^2 d^2(x,y) &   &\forall x,y\in X\\
    &   &   v_x&\in \R^p   &   &\forall x\in X
\end{aligned}
\end{equation}
It follows by definition that \autoref{outlierLAS} is a relaxation of the $(k,c)$-outlier embedding problem. Assume that $(X,d)$ admits a $(k,c)$-outlier embedding, then the optimum of \autoref{outlierLAS} is at most $k$.

\subsection{Simple Threshold Rounding}
We try to apply the rounding idea in \cite{chawla_composition_2023}.
Let $\Delta\in [0,1]$ be the threshold to be specified and let the outlier set be $K=\set{x| \delta_x \geq \Delta}$.
For the size of $K$ we can show that $|K|\leq \sum_x \delta_x / \Delta \leq k/\Delta$. To make $|K|=O(k)$ we need to make sure $1/\Delta =O(1)$.

To get the distortion we need to have a scale factor $\beta$ on $v_x$ to make sure our embedding is an expansion. Let $\alpha(x)=\beta v_x$ for $\delta_x < \Delta$.

\begin{equation*}
\begin{aligned}
\beta^2\|v_x-v_y\|^2 &\geq \beta^2 d^2(x,y)(1-\delta_x)(1-\delta_y)\geq d^2(x,y)
\end{aligned}
\end{equation*}

So we take $\beta = \frac{1}{(1-\Delta)^2}$. Another side provides the distortion,

\begin{equation*}
\begin{aligned}
\|\alpha(x)-\alpha(y)\|^2 &\leq c^2 d^2(x,y)(1-\delta_x)(1-\delta_y)\beta^2\\
                        &\leq \frac{c^2 d^2(x,y)}{(1-\Delta)^2}
\end{aligned}
\end{equation*}
Thus taking $\Delta$ to be any constant gives us an $(O(k),O(c))$-outlier embedding. For an $(O(k),(1+\e)c)$-embedding, let $\frac{1}{(1-\Delta^2)}=1+\e$. Then one has \[|K|\leq \frac{k}{1-\sqrt{\frac{1}{1+\e}}}\leq \frac{k}{\frac{1}{2}\e-\frac{3}{8}\e^2}.\]

\subsection{Vector Based Rounding}

\end{comment}

\bibliography{ref}
\bibliographystyle{alpha}
\end{document}