first commit

2025-12-15 11:59:12 +08:00
commit 281c68fa2e
5 changed files with 914 additions and 0 deletions
--- a/main.tex
+++ b/main.tex
@@ -0,0 +1,31 @@
+\documentclass[a4paper,11pt]{article}
+\usepackage{chao}
+\usepackage{algo}
+\geometry{margin=2cm}
+
+\title{Note on ``Fast \textsf{EXP3} Algorithms''}
+\author{\zh{丛宇}}
+
+\newcommand{\expt}{\textsf{EXP3}}
+
+\begin{document}
+\maketitle
+A recent paper shows that \expt{} algorithm for adversarial bandits can be implemented in $O(1)$ expected time per round. \url{https://arxiv.org/pdf/2512.11201v1}
+
+\section{Problem Settings}
+
+Then bandit has $K\geq 2$ arms. At round $t\in [T]$, the adversary decides the loss $(\ell_{t,1},\dots,\ell_{t,K})\in [0,1]^K$ based on the history of the loss and player's choice for previous rounds.
+Note that the adversary knows the player's algorithm.
+Then the player selects one arm $a_t\in [K]$ and observes the loss $\ell_{t,a_t}$.
+The player's goal is to minimize the expected pseudo-regret $\bar R_T$ defined as follows.
+\[
+\bar R_T = \E \left[\sum_{t\in[T]}\ell_{t,a_t}\right]-\min_{i\in [K]}\sum_{t\in[T]}\ell_{t,i}
+\]
+
+\subsection{\expt{} Algorithm}
+
+\expt{} Algorithm works like multiplicative weight update method for linear programs.
+
+
+
+\end{document}