mirror of
https://github.com/congyu711/BeamerTheme.git
synced 2025-07-12 00:21:33 +08:00
example 2. lowdim LP slides
This commit is contained in:
parent
d507c8ba55
commit
14b79c2624
BIN
LowdimLP.pdf
Normal file
BIN
LowdimLP.pdf
Normal file
Binary file not shown.
219
LowdimLP.tex
Normal file
219
LowdimLP.tex
Normal file
@ -0,0 +1,219 @@
|
||||
\documentclass{beamer}
|
||||
|
||||
\author{Yu Cong}
|
||||
\title[Minimizing sum of pwl convex function]{Minimizing the Sum of Piecewise Linear Convex Functions}
|
||||
\date{\today}
|
||||
|
||||
% \AtBeginSection[]{
|
||||
% \frame{\frametitle{Outline}\tableofcontents[currentsection,
|
||||
% subsectionstyle=show/show/shaded]}
|
||||
% }
|
||||
|
||||
\usetheme{Simple}
|
||||
% \useoutertheme{tree}
|
||||
|
||||
\begin{document}
|
||||
\begin{frame}[plain]
|
||||
% Print the title page as the first slide
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[plain]{Plan}
|
||||
\mybox[oliver!10]{\scriptsize The order of the slides is basically the order in which I think about this problem.}
|
||||
\tableofcontents
|
||||
\end{frame}
|
||||
|
||||
\section{Problems \& Definitions}
|
||||
\begin{frame}{$\min \sum f_i(a_i\cdot x-b_i)$}
|
||||
\begin{problem}
|
||||
Given $n$ piecewise linear convex functions $f_1,...,f_n:\R \to \R$ of total $m$ breakpoints, and $n$ linear functions $a_i\cdot x-b_i:\R^d\to \R$, find $\min_x \sum_i f_i(a_i\cdot x-b_i)$.
|
||||
\end{problem}
|
||||
\begin{figure}
|
||||
\centering
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.4\linewidth]{images/Piecewise_linear_function.svg.png}
|
||||
\caption{A 1D piecewise linear function with 4 line segments and 3 breakpoints}
|
||||
\label{fig:sub1}
|
||||
\end{subfigure}%
|
||||
\begin{subfigure}{.5\textwidth}
|
||||
\centering
|
||||
\includegraphics[width=.4\linewidth]{images/Piecewise_linear_function2D.png}
|
||||
\caption{A 2D piecewise concave function}
|
||||
\label{fig:sub2}
|
||||
\end{subfigure}
|
||||
% \caption{A figure with two subfigures}
|
||||
\label{fig:1}
|
||||
\end{figure}
|
||||
$f_i(a_i\cdot x-b_i):\R^d\to \R$ is also piecewise linear convex.
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{General piecewise linear convex function in $\R^d$}
|
||||
\begin{definition}[piecewise linear convex function in $\R^d$]\label{def:pwlc}
|
||||
\[
|
||||
g(x)=\max \{a_1^Tx+b_1,\ldots,a_L^Tx+b_L\}
|
||||
\]
|
||||
\end{definition}
|
||||
|
||||
Every piecewise linear convex function in $\R^d$ can be expressed in this form.\footnote{S.P. Boyd, L. Vandenberghe, \textbf{Convex optimization}, Cambridge University Press, Cambridge, UK ; New York, 2004.}
|
||||
|
||||
However, observe that in our problem the piecewise linear convex function is not that general. It is a composition of a linear mapping and an 1D piecewise linear convex function.
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{$f\circ l\not \equiv g$}
|
||||
\begin{proof}
|
||||
\small
|
||||
Consider a piecewise linear convex function $g:\R^2\to \R$. $g$ can be viewed as the maximum of a set of planes in $\R^3$.
|
||||
|
||||
Consider a series of points $P=\set{p_1,p_2,...,p_k}$ on the 2D plane. After applying the linear mapping to $P$, we will get a sequence of numbers(points in 1D) $P'=\set{p_1',p_2',...,p_k'}$. We assume that $P'$ is non-decreasing. Note that the value of $g$ on $P'$ is always unimodal since $g$ is convex. However, the value of $f$ on $P$ may not be unimodal. Thus the composition of a linear mapping and a pwl convex function in 1D is not equivalent to pwl convex functions in high dimensions.
|
||||
\end{proof}
|
||||
\end{frame}
|
||||
|
||||
\section{Properties}
|
||||
\begin{frame}[allowframebreaks]{A linear time algorithm}
|
||||
\begin{problem}
|
||||
Given $n$ piecewise linear convex functions $f_1,...,f_n:\R \to \R$ of total $m$ breakpoints, and $n$ linear functions $a_i\cdot x-b_i:\R^d\to \R$, find $\min_x \sum_i f_i(a_i\cdot x-b_i)$.
|
||||
\end{problem}
|
||||
This can be solve in $O(2^{2^d} (m+n))$ through Megiddo's Low dimension LP algorithm.\footnote{Nimrod Megiddo. Linear programming in linear time when the dimension is fixed. J. ACM, 31(1):114–127, jan 1984.}
|
||||
|
||||
Let $n_i$ be the number of line segments in $f_i$. Note that $\sum_i n_i=m+n$.
|
||||
|
||||
We can formulate the optimization problem as the following linear program,
|
||||
\newpage
|
||||
|
||||
\begin{align*}
|
||||
\min &\sum_{i=1}^n f_i\\
|
||||
s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
|
||||
\end{align*}
|
||||
|
||||
where $\alpha_j x - \beta_j$ is the $j$'th line segment on $f_i$.
|
||||
|
||||
There will be $m+n$ constraints in total.
|
||||
\end{frame}
|
||||
|
||||
\section{LP in Low Dimensions}
|
||||
\begin{frame}[allowframebreaks]{Megiddo's algorithm}
|
||||
{\tiny \url{https://people.inf.ethz.ch/gaertner/subdir/texts/own_work/chap50-fin.pdf}}
|
||||
|
||||
The dimension $d$ (in our problem, the dimension of $x$) is small while the number of constraints are huge. We need only $d$ linearly independent tight constraints to identify the optimal solution $x^*$.
|
||||
Thus most of the constraints are useless.
|
||||
\BlankLine
|
||||
|
||||
|
||||
\textbf{For one constraint, how can we know where does $x^*$ locate with respect to it?}
|
||||
|
||||
Through inquiries. Let $a\cdot x \leq b$ be the constraint. Define 3 hyperplanes, $a\cdot x = c$ where $c\in \set{b,b-\e,b+\e}$. Now solve three $d-1$ dimension linear programming. The largest of the three objective functions tells us where $x^*$ lies with respect to the
|
||||
hyperplane.
|
||||
|
||||
\newpage
|
||||
Finding the optimal solution $x^*$ is therefore equivalent to the following problem,
|
||||
\begin{problem}[Multidimensional Search Problem]
|
||||
Suppose that there exists a point $x^*$ which is not known to us, but there is a oracle that can tell the position of $x^*$ relative to any hyperplane in $\R^d$. Given $n$ hyperplanes, we want to know the position of $x^*$ relative to each of them.
|
||||
\end{problem}
|
||||
|
||||
\textbf{What about 1 dimension search?} A fastest way will be using the linear time median algorithm. We can find the median of $n$ numbers and call the oracle to compare the median with $x^*$. Thus with $O(n)$ time median finding and one oracle call, we find the relative position of $n/2$ elements relative to $x^*$.
|
||||
|
||||
\newpage
|
||||
|
||||
If we can do similar things in $\R^d$, i.e., there is a method which makes $A(d)$ oracle calls and determines at least $B(d)$ fraction of relative positions, then we can apply this method $\log_{\frac{1}{1-B(d)}} n$ times to find all relative positions.
|
||||
|
||||
Note that in 1 dimension, $A(1)=1$ and $B(1)=1/2$ (call oracle to compare $x^*$ and the median). In $\R^d$, our oracle is the recursive inquiry.
|
||||
|
||||
A trivial method will be iterating on all hyperplanes and calling the oracle on each one, since there is no \emph{median} of a set of hyperplanes in $\R^d$. The complexity recurrence is
|
||||
\[T(n,d)=n(3T(n-1,d-1)+O(nd))\]
|
||||
Note that in this setting $A(d)=1$ and $B(d)=1/n$.
|
||||
|
||||
\newpage
|
||||
Megiddo designed a clever method where $A(d)=2^{d-1}$ and $B(d)=2^{-(2^d-1)}$.
|
||||
|
||||
\begin{lemma}
|
||||
\begin{figure}
|
||||
\includegraphics[width=.3\textwidth]{images/1234.pdf}
|
||||
\end{figure}
|
||||
Given two lines through the origin with slopes of opposite sign, knowing which quadrant $x^*$ lies in allows us to locate it with respect to at least one of the lines.
|
||||
\end{lemma}
|
||||
\newpage
|
||||
Let $l_H$ be the intersection of hyperplane $H$ and $x_1x_2$ plane.
|
||||
Compute a partition $S_1\sqcup S_2=\mathcal H$.
|
||||
$H\in S_1$ iff $l_H$ has positive slope. Otherwise $l_H\in S_2$. We further assume that $|S_1|=|S_2|=n/2$.
|
||||
\noindent
|
||||
\begin{minipage}[t]{.5\textwidth}
|
||||
\begin{figure}
|
||||
\includegraphics[width=.85\textwidth]{images/4l1234.pdf}
|
||||
\end{figure}
|
||||
\end{minipage}% <---------------- Note the use of "%"
|
||||
\begin{minipage}[t]{.5\textwidth}
|
||||
\vspace{25pt}
|
||||
Now we have $n/2$ pairs $(H_1,H_2)$, where $H_i\in S_i$. Let $l_i$ be the intersection of $H_i$ and $x_1x_2$ plane.
|
||||
Let $H_{x_i}$ be the linear combination of $H_1$ and $H_2$ s.t. $x_i$ is eliminated.
|
||||
\end{minipage}
|
||||
|
||||
{
|
||||
% Now we have $n/2$ pairs $(H_1,H_2)$, where $H_i\in S_i$. Let $l_i$ be the intersection of $H_i$ and $x_1x_2$ plane.
|
||||
% Let $H_{x_i}$ be the linear combination of $H_1$ and $H_2$ s.t. $x_i$ is eliminated.
|
||||
By the previous lemma, calling oracle on $l_{x_1}$ and $l_{x_2}$ locate $x^*$ with respect to at least one of $H_1$ and $H_2$.}
|
||||
\newpage
|
||||
Input: $S_1,S_2$ and the pairs.
|
||||
\begin{enumerate}
|
||||
\item recursively locate $x^*$ respect to $B(d-1)n/2$ hyperplanes($H_{x_i}$) with $A(d-1)$ oracle calls in $S_1$.
|
||||
\item locate with respect to a $B(d-1)$-fraction of corresponding paired hyperplanes in $S_2$.
|
||||
\item There are still $(1-{B(d-1)}^2)/2$-fraction of hyperplanes for which we do not know the relative position with $x^*$. Run this algorithm on these hyperplanes.
|
||||
\end{enumerate}
|
||||
This gives the recurrence
|
||||
\[
|
||||
T(n,d)\leq 3\cdot 2^{d-1}T(n,d-1)+T((1-2^{1-2^d})n,d)+O(nd)
|
||||
\]
|
||||
with solution $T(n,d)=O(2^{2^d}n)$.
|
||||
\end{frame}
|
||||
\begin{frame}{Zemel's conversion}
|
||||
Our linear program has \emph{dimension} $n+d$. \href{https://www.sciencedirect.com/science/article/abs/pii/0020019084900140}{Zemel} showed that this kind of problem can be converted to a linear program of dimension $d$.
|
||||
\begin{align*}
|
||||
\min &\sum_{i=1}^n f_i\\
|
||||
s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
|
||||
\end{align*}
|
||||
\mybox[oliver!20]{
|
||||
\scriptsize Here is an intuitive way to understand the conversion. One can think the LP above as a $d$-dimensional search problem with $n+d$ hyperplanes. However, the oracle is quite different. The oracle takes the unknown $x^*$ and a hyperplane $H$ as input, returns the relative position by computing the minimal $f_i$.
|
||||
}
|
||||
\end{frame}
|
||||
|
||||
\section{Possible Improvements}
|
||||
|
||||
\begin{frame}{Other algorithms for fixed dimension LP}
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{images/table.png}
|
||||
\caption{Algorithms for LP in low dimensions \footnote{table stolen from \url{https://dl.acm.org/doi/10.1145/3155312}}}
|
||||
\end{figure}
|
||||
\textbf{Can we use faster fixed dimension LP algorithms to get better complexity?}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[allowframebreaks]{LP-type problem}
|
||||
Algorithms for low dim LP are actually solving a more abstract problem.
|
||||
\begin{definition}[LP-type problem]
|
||||
Given a set $S$ and a function $f:S\to \R$. $f$ satisfies two properties:%
|
||||
\begin{itemize}
|
||||
\item Monotonicity: $\forall A\subseteq B\subseteq S, f(A)\leq f(B)\leq f(S)$.
|
||||
\item Locality: $\forall A\subseteq B\subseteq S$ and $\forall x\in S$, if $f(A) = f(B) = f(A \cup \{x\})$, then $f(A) = f(B \cup \{x\})$.
|
||||
\end{itemize}
|
||||
\end{definition}
|
||||
Linear programs(minimization) are LP-type problems.
|
||||
|
||||
$B\subseteq S$ is a basis if $\forall B'\subsetneq B, f(B')<f(B)$. A set of `useful' constraints in a linear program is a basis.
|
||||
|
||||
The combinatorial dimension is the size of the largest basis.
|
||||
|
||||
If a LP problem has low dimension, then its combinatorial dimension is low. \textbf{What about the converse?}
|
||||
\newpage
|
||||
\textbf{Does our LP has low combinatorial dimension?}
|
||||
|
||||
No! A basis contains at least $n$ constraints since otherwise some $f_i$ is unbounded.
|
||||
\end{frame}
|
||||
|
||||
|
||||
|
||||
\begin{frame}{Aggregate the pwl convex functions}
|
||||
% blog posts
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
BIN
images/1234.pdf
Normal file
BIN
images/1234.pdf
Normal file
Binary file not shown.
BIN
images/2dlp_1.png
Normal file
BIN
images/2dlp_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 48 KiB |
BIN
images/2dlp_2.png
Normal file
BIN
images/2dlp_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 40 KiB |
BIN
images/2dlp_3.png
Normal file
BIN
images/2dlp_3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
BIN
images/4l1234.pdf
Normal file
BIN
images/4l1234.pdf
Normal file
Binary file not shown.
BIN
images/Piecewise_linear_function.svg.png
Normal file
BIN
images/Piecewise_linear_function.svg.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 4.1 KiB |
BIN
images/Piecewise_linear_function2D.png
Normal file
BIN
images/Piecewise_linear_function2D.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 56 KiB |
BIN
images/table.png
Normal file
BIN
images/table.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 261 KiB |
26
images/tikz.tex
Normal file
26
images/tikz.tex
Normal file
@ -0,0 +1,26 @@
|
||||
\documentclass[tikz, border=10pt]{standalone}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{tikzpicture}
|
||||
% Draw the two thick lines l1 and l2
|
||||
\draw[] (-3,0) -- (3,0) ; % Horizontal line l1 (very thick)
|
||||
\draw[] (0,-3) -- (0,3) ; % Vertical line l2 (very thick)
|
||||
|
||||
% Draw the diagonal lines
|
||||
\draw[thick] (-3,-1) -- (3,1); % Diagonal line
|
||||
\draw[thick] (-3,1) -- (3,-1); % Diagonal line
|
||||
|
||||
% Add labels
|
||||
\node at (2.5,0.5) {$l_1$}; % label for l1
|
||||
\node at (2.5,-0.5) {$l_2$}; % label for l2
|
||||
\node at (3.3,0) {$l_{x_2}$};
|
||||
\node at (0,3.3) {$l_{x_1}$};
|
||||
% Quadrant numbers
|
||||
\node at (1,1.5) {1};
|
||||
\node at (-1,1.5) {2};
|
||||
\node at (-1,-1.5) {3};
|
||||
\node at (1,-1.5) {4};
|
||||
\end{tikzpicture}
|
||||
|
||||
\end{document}
|
Loading…
x
Reference in New Issue
Block a user