Files
EXP3notes/main.tex
2025-12-15 11:59:12 +08:00

32 lines
1.1 KiB
TeX

\documentclass[a4paper,11pt]{article}
\usepackage{chao}
\usepackage{algo}
\geometry{margin=2cm}
\title{Note on ``Fast \textsf{EXP3} Algorithms''}
\author{\zh{丛宇}}
\newcommand{\expt}{\textsf{EXP3}}
\begin{document}
\maketitle
A recent paper shows that \expt{} algorithm for adversarial bandits can be implemented in $O(1)$ expected time per round. \url{https://arxiv.org/pdf/2512.11201v1}
\section{Problem Settings}
Then bandit has $K\geq 2$ arms. At round $t\in [T]$, the adversary decides the loss $(\ell_{t,1},\dots,\ell_{t,K})\in [0,1]^K$ based on the history of the loss and player's choice for previous rounds.
Note that the adversary knows the player's algorithm.
Then the player selects one arm $a_t\in [K]$ and observes the loss $\ell_{t,a_t}$.
The player's goal is to minimize the expected pseudo-regret $\bar R_T$ defined as follows.
\[
\bar R_T = \E \left[\sum_{t\in[T]}\ell_{t,a_t}\right]-\min_{i\in [K]}\sum_{t\in[T]}\ell_{t,i}
\]
\subsection{\expt{} Algorithm}
\expt{} Algorithm works like multiplicative weight update method for linear programs.
\end{document}