first commit

2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions
--- a/services/web/frontend/js/features/source-editor/lezer-latex/README.md
+++ b/services/web/frontend/js/features/source-editor/lezer-latex/README.md
@@ -0,0 +1,82 @@
+# Lezer-LaTeX, a LaTeX Parser
+
+Lezer-LaTeX is a LaTeX parser implemented with [lezer](https://lezer.codemirror.net/), the parser system used by [CodeMirror 6](https://codemirror.net/6/).
+
+The parser is written in a "grammar" file, (and a "tokens" file with custom tokenizer logic) which is then compiled by `@lezer/generator` into a parser module and a "terms" module. The parser module is then loaded by the CodeMirror 6 in the web frontend codebase.
+
+
+## Important files
+
+- Source files:
+  - `./latex.grammar`: The grammar file, containing the specification for the parser
+  - `./tokens.mjs`: The custom tokenizer logic, required by some rules in the grammar
+
+- Generated files:
+  - `./latex.mjs`: The generated parser
+  - `./latex.terms.mjs`: The generated terms file
+  - (these files are ignored by git, eslint, and prettier)
+
+- Scripts:
+  - `web/scripts/lezer-latex/generate.js`: A script which runs the generator on the grammar, producing the generated parser/terms files
+  - `web/scripts/lezer-latex/run.mjs`: A script that runs the parser against a supplied file, and prints the tree to the terminal
+
+- Webpack plugins:
+  - `web/webpack-plugins/lezer-grammar-compiler.js`: A webpack plugin that calls the generator as part of the webpack build. In dev, it will automatically re-build the parser when the grammar file changes.
+
+
+## NPM tasks
+
+- `lezer-latex:generate`: Generate the parser files from the grammar
+  - (Calls `lezer-latex/generate.js`)
+  - This should be run whenever the grammar changes
+
+- `lezer-latex:run`: Run the parser against a file
+  - (Calls `lezer-latex/run.js`)
+
+
+### Generating the parser
+
+From the monorepo root:
+
+``` sh
+bin/npm -w services/web run 'lezer-latex:generate'
+```
+
+
+## Tests
+
+Unit tests for the parser live in `web/test/unit/src/LezerLatex`. There are three kinds of test, in three subdirectories:
+
+- `corpus/`: A set of tests using lezer's test framework, consisting of example text and the expected parse tree
+- `examples/`: A set of realistic LaTeX documents. These tests pass if the files parse with no errors
+- `regressions/`: Like `examples/`, these are expected to parse without error, but they are not realistic documents.
+
+These tests run as part of `test_frontend`. You can run these tests alone by invoking:
+
+``` sh
+make test_unit MOCHA_GREP='lezer-latex'
+```
+
+
+## Trying the parser
+
+While developing the parser, you can run it against a file by calling the `lezer-latex:run` task. There are
+some example files in the test suite, at `web/test/unit/src/LezerLatex/examples/`.
+
+For example:
+
+``` sh
+bin/npm -w services/web run 'lezer-latex:run'  web/test/unit/src/LezerLatex/examples/amsmath.tex
+```
+
+If you omit the file path, the default file (`examples/demo.tex`) will be run.
+
+
+## Integration into web
+
+The web frontend imports the parser (from `latex.mjs`), in `frontend/js/features/source-editor/languages/latex/index.ts`.
+The parser is then plugged in to the CM6 language system.
+
+### The web build
+
+In `web/Dockerfile`, we have a `RUN` command that calls `lezer-latex:generate` as part of the build. This is necessary to ensure the parser is built before the CI tests run (notably: we can't do the build during the tests, because we can't write to disk during that stage of CI).
--- a/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar
+++ b/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar
@@ -0,0 +1,829 @@
+// Track environments
+
+@context elementContext from "./tokens.mjs"
+
+// External tokens must be defined before normal @tokens to take precedence
+// over them.
+
+@external tokens verbTokenizer from "./tokens.mjs" {
+    VerbContent
+}
+
+@external tokens lstinlineTokenizer from "./tokens.mjs" {
+    LstInlineContent
+}
+
+@external tokens literalArgTokenizer from "./tokens.mjs" {
+    LiteralArgContent
+}
+
+@external tokens spaceDelimitedLiteralArgTokenizer from "./tokens.mjs" {
+    SpaceDelimitedLiteralArgContent
+}
+
+@external tokens verbatimTokenizer from "./tokens.mjs" {
+    VerbatimContent
+}
+
+// external tokenizer to read control sequence names including @ signs
+// (which are often used in TeX definitions).
+@external tokens csnameTokenizer from "./tokens.mjs" {
+    Csname
+}
+
+@external tokens trailingContentTokenizer from "./tokens.mjs" {
+    TrailingWhitespaceOnly,
+    TrailingContent
+}
+
+// It doesn't seem to be possible to access specialized tokens in the context tracker.
+// They have id's which are not exported in the latex.terms.js file.
+// This is a workaround:  use an external specializer to explicitly choose the terms
+// to use for the specialized tokens.
+@external specialize {CtrlSeq} specializeCtrlSeq from "./tokens.mjs" {
+    Begin,
+    End,
+    RefCtrlSeq,
+    RefStarrableCtrlSeq,
+    CiteCtrlSeq,
+    CiteStarrableCtrlSeq,
+    LabelCtrlSeq,
+    MathTextCtrlSeq,
+    HboxCtrlSeq,
+    TitleCtrlSeq,
+    DocumentClassCtrlSeq,
+    UsePackageCtrlSeq,
+    HrefCtrlSeq,
+    UrlCtrlSeq,
+    VerbCtrlSeq,
+    LstInlineCtrlSeq,
+    IncludeGraphicsCtrlSeq,
+    CaptionCtrlSeq,
+    DefCtrlSeq,
+    LetCtrlSeq,
+    LeftCtrlSeq,
+    RightCtrlSeq,
+    NewCommandCtrlSeq,
+    RenewCommandCtrlSeq,
+    NewEnvironmentCtrlSeq,
+    RenewEnvironmentCtrlSeq,
+    // services/web/frontend/js/features/outline/outline-parser.js
+    BookCtrlSeq,
+    PartCtrlSeq,
+    ChapterCtrlSeq,
+    SectionCtrlSeq,
+    SubSectionCtrlSeq,
+    SubSubSectionCtrlSeq,
+    ParagraphCtrlSeq,
+    SubParagraphCtrlSeq,
+    InputCtrlSeq,
+    IncludeCtrlSeq,
+    ItemCtrlSeq,
+    NewTheoremCtrlSeq,
+    TheoremStyleCtrlSeq,
+    CenteringCtrlSeq,
+    BibliographyCtrlSeq,
+    BibliographyStyleCtrlSeq,
+    AuthorCtrlSeq,
+    AffilCtrlSeq,
+    AffiliationCtrlSeq,
+    DateCtrlSeq,
+    MaketitleCtrlSeq,
+    TextColorCtrlSeq,
+    ColorBoxCtrlSeq,
+    HLineCtrlSeq,
+    TopRuleCtrlSeq,
+    MidRuleCtrlSeq,
+    BottomRuleCtrlSeq,
+    MultiColumnCtrlSeq,
+    ParBoxCtrlSeq,
+    TextBoldCtrlSeq,
+    TextItalicCtrlSeq,
+    TextSmallCapsCtrlSeq,
+    TextTeletypeCtrlSeq,
+    TextMediumCtrlSeq,
+    TextSansSerifCtrlSeq,
+    TextSuperscriptCtrlSeq,
+    TextSubscriptCtrlSeq,
+    TextStrikeOutCtrlSeq,
+    EmphasisCtrlSeq,
+    UnderlineCtrlSeq,
+    SetLengthCtrlSeq
+}
+
+@external specialize {EnvName} specializeEnvName from "./tokens.mjs" {
+    DocumentEnvName,
+    TabularEnvName,
+    EquationEnvName,
+    EquationArrayEnvName,
+    VerbatimEnvName,
+    TikzPictureEnvName,
+    FigureEnvName,
+    ListEnvName,
+    TableEnvName
+}
+
+@external specialize {CtrlSym} specializeCtrlSym from "./tokens.mjs" {
+    OpenParenCtrlSym,
+    CloseParenCtrlSym,
+    OpenBracketCtrlSym,
+    CloseBracketCtrlSym,
+    LineBreakCtrlSym
+}
+
+@tokens {
+    CtrlSeq { "\\" $[a-zA-Z]+ }
+    CtrlSym { "\\" ![a-zA-Z] }
+
+    // tokens for paragraphs
+    Whitespace { $[ \t]+ }
+    NewLine { "\n" }
+    BlankLine { "\n" "\n"+ }
+    Normal { ![\\{}\[\]$&~#^_% \t\n] ![\\{}\[\]$&~#^_%\t\n]* }  // everything is normal text, except these characters
+    @precedence { CtrlSeq, CtrlSym, BlankLine, NewLine, Whitespace, Normal }
+
+    OpenBrace[closedBy=CloseBrace] { "{" }
+    CloseBrace[openedBy=OpenBrace] { "}" }
+    OpenBracket[closedBy=CloseBracket] { "[" }
+    CloseBracket[openedBy=OpenBracket] { "]" }
+
+    Comment { "%" ![\n]* "\n"? }
+
+    Dollar { "$" }
+
+    Number { $[0-9]+ ("." $[0-9]*)? }
+    MathSpecialChar { $[^_=<>()\-+/*]+ }  // FIXME not all of these are special
+    MathChar { ![0-9^_=<>()\-+/*\\{}\[\]$%&~ \t\n]+ }
+
+    @precedence { Number, MathSpecialChar, MathChar }
+
+    Ampersand { "&" }
+    Tilde { "~" }
+
+    EnvName {  $[a-zA-Z]+ $[*]? }
+}
+
+@top LaTeX {
+    Text
+}
+
+@skip { Comment }
+
+// TEXT MODE
+
+optionalWhitespace {
+    !argument Whitespace
+}
+
+OptionalArgument {
+    !argument OpenBracket ShortOptionalArg CloseBracket
+}
+
+TextArgument {
+    !argument OpenBrace LongArg CloseBrace
+}
+
+SectioningArgument {
+    !argument OpenBrace LongArg CloseBrace
+}
+
+LabelArgument {
+    !argument ShortTextArgument
+}
+
+RefArgument {
+    !argument ShortTextArgument
+}
+
+BibKeyArgument {
+    !argument ShortTextArgument
+}
+
+PackageArgument {
+    !argument ShortTextArgument
+}
+
+TabularArgument {
+    !argument OpenBrace TabularContent CloseBrace
+}
+
+UrlArgument {
+    OpenBrace LiteralArgContent CloseBrace
+}
+
+FilePathArgument {
+    OpenBrace LiteralArgContent CloseBrace
+}
+
+BareFilePathArgument {
+    Whitespace SpaceDelimitedLiteralArgContent
+}
+
+DefinitionArgument {
+    !argument NewLine? Whitespace* OpenBrace DefinitionFragment? CloseBrace
+}
+
+MacroParameter {
+    "#" ("1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9")
+}
+
+OptionalMacroParameter {
+    OpenBracket MacroParameter CloseBracket
+}
+
+// The autocompletion code in services/web/frontend/js/features/source-editor/utils/tree-operations/commands.ts
+// depends on following the `KnownCommand { Command { CommandCtrlSeq [args] } }`
+// structure
+KnownCommand<ArgumentType> {
+    Title {
+        TitleCtrlSeq optionalWhitespace? OptionalArgument? TextArgument
+    } |
+    Author {
+        AuthorCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
+    } |
+    Affil {
+        AffilCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
+    } |
+    Affiliation {
+        AffiliationCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
+    } |
+    Date {
+        DateCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? ShortTextArgument
+    } |
+    DocumentClass {
+        DocumentClassCtrlSeq optionalWhitespace? OptionalArgument?
+        DocumentClassArgument { ShortTextArgument }
+    } |
+    BibliographyCommand {
+        BibliographyCtrlSeq optionalWhitespace?
+        BibliographyArgument { ShortTextArgument }
+    } |
+    BibliographyStyleCommand {
+        BibliographyStyleCtrlSeq optionalWhitespace?
+        BibliographyStyleArgument { ShortTextArgument }
+    } |
+    UsePackage {
+        UsePackageCtrlSeq optionalWhitespace? OptionalArgument?
+        PackageArgument
+    } |
+    TextColorCommand {
+        TextColorCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ArgumentType
+    } |
+    ColorBoxCommand {
+        ColorBoxCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ArgumentType
+    } |
+    HrefCommand {
+        HrefCtrlSeq optionalWhitespace? UrlArgument ShortTextArgument
+    } |
+    NewTheoremCommand {
+        NewTheoremCtrlSeq "*"? optionalWhitespace? ShortTextArgument ((OptionalArgument? TextArgument) | (TextArgument OptionalArgument))
+    } |
+    TheoremStyleCommand {
+        TheoremStyleCtrlSeq optionalWhitespace? ShortTextArgument
+    } |
+    UrlCommand {
+        UrlCtrlSeq optionalWhitespace? UrlArgument
+    } |
+    VerbCommand {
+        VerbCtrlSeq VerbContent
+    } |
+    LstInlineCommand {
+        LstInlineCtrlSeq optionalWhitespace? OptionalArgument? LstInlineContent
+    } |
+    IncludeGraphics {
+        IncludeGraphicsCtrlSeq optionalWhitespace? OptionalArgument?
+        IncludeGraphicsArgument { FilePathArgument }
+    } |
+    Caption {
+        CaptionCtrlSeq "*"? optionalWhitespace? OptionalArgument? TextArgument
+    } |
+    Label {
+        LabelCtrlSeq optionalWhitespace? LabelArgument
+    } |
+    Ref {
+        (RefCtrlSeq | RefStarrableCtrlSeq "*"?) optionalWhitespace? OptionalArgument? optionalWhitespace? OptionalArgument? optionalWhitespace? RefArgument
+    } |
+    Cite {
+        (CiteCtrlSeq | CiteStarrableCtrlSeq "*"?) optionalWhitespace? OptionalArgument? optionalWhitespace? OptionalArgument? optionalWhitespace? BibKeyArgument
+    } |
+    Def {
+        // allow more general Csname argument to \def commands, since other symbols such as '@' are often used in definitions
+        DefCtrlSeq optionalWhitespace? (Csname | CtrlSym) optionalWhitespace? (MacroParameter | OptionalMacroParameter)* optionalWhitespace? DefinitionArgument
+    } |
+    Let {
+        LetCtrlSeq Csname optionalWhitespace? "="? optionalWhitespace? Csname
+    } |
+    Hbox {
+        HboxCtrlSeq optionalWhitespace? TextArgument
+    } |
+    NewCommand {
+        NewCommandCtrlSeq optionalWhitespace?
+        (Csname | OpenBrace LiteralArgContent CloseBrace)
+        (OptionalArgument)*
+        DefinitionArgument
+    } |
+    RenewCommand {
+        RenewCommandCtrlSeq optionalWhitespace?
+        (Csname | OpenBrace LiteralArgContent CloseBrace)
+        (OptionalArgument)*
+        DefinitionArgument
+    } |
+    NewEnvironment {
+        NewEnvironmentCtrlSeq optionalWhitespace?
+        (OpenBrace LiteralArgContent CloseBrace)
+        (OptionalArgument)*
+        DefinitionArgument
+        DefinitionArgument
+    } |
+    RenewEnvironment {
+        RenewEnvironmentCtrlSeq optionalWhitespace?
+        (Csname | OpenBrace LiteralArgContent CloseBrace)
+        (OptionalArgument)*
+        DefinitionArgument
+        DefinitionArgument
+    } |
+    Input {
+        InputCtrlSeq InputArgument { ( FilePathArgument | BareFilePathArgument ) }
+    } |
+    Include {
+        IncludeCtrlSeq IncludeArgument { FilePathArgument }
+    } |
+    Centering {
+        CenteringCtrlSeq
+    } |
+    Item {
+        ItemCtrlSeq OptionalArgument? optionalWhitespace?
+    } |
+    Maketitle {
+        MaketitleCtrlSeq optionalWhitespace?
+    } |
+    HorizontalLine {
+        (HLineCtrlSeq | TopRuleCtrlSeq | MidRuleCtrlSeq | BottomRuleCtrlSeq) optionalWhitespace?
+    } |
+    MultiColumn {
+        MultiColumnCtrlSeq
+            optionalWhitespace? SpanArgument { ShortTextArgument }
+            optionalWhitespace? ColumnArgument { ShortTextArgument }
+            optionalWhitespace? TabularArgument
+    } |
+    MathTextCommand {
+        MathTextCtrlSeq optionalWhitespace? "*"? TextArgument
+    } |
+    ParBoxCommand {
+        ParBoxCtrlSeq
+            (optionalWhitespace? OptionalArgument)*
+            ShortTextArgument
+            optionalWhitespace? TextArgument
+    } |
+    TextBoldCommand {
+        TextBoldCtrlSeq TextArgument
+    } |
+    TextItalicCommand {
+        TextItalicCtrlSeq TextArgument
+    } |
+    TextSmallCapsCommand {
+        TextSmallCapsCtrlSeq TextArgument
+    } |
+    TextTeletypeCommand {
+        TextTeletypeCtrlSeq TextArgument
+    } |
+    TextMediumCommand {
+        TextMediumCtrlSeq TextArgument
+    } |
+    TextSansSerifCommand {
+        TextSansSerifCtrlSeq TextArgument
+    } |
+    TextSuperscriptCommand {
+        TextSuperscriptCtrlSeq TextArgument
+    } |
+    TextSubscriptCommand {
+        TextSubscriptCtrlSeq TextArgument
+    } |
+    StrikeOutCommand {
+        TextStrikeOutCtrlSeq ArgumentType
+    } |
+    EmphasisCommand {
+        EmphasisCtrlSeq ArgumentType
+    } |
+    UnderlineCommand {
+        UnderlineCtrlSeq ArgumentType
+    } |
+    SetLengthCommand {
+        SetLengthCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ShortTextArgument
+    }
+}
+
+UnknownCommand {
+      (CtrlSeq !argument Whitespace (OptionalArgument | TextArgument)+)
+    | (CtrlSeq (OptionalArgument | TextArgument)+)
+    | CtrlSeq Whitespace?
+    | CtrlSym
+}
+
+Command {
+      KnownCommand<TextArgument>
+    | UnknownCommand
+    | KnownCtrlSym
+    // Not technically allowed in normal mode, but not worth failing the parse over
+    | LeftCtrlSeq
+    | RightCtrlSeq
+
+}
+
+KnownCtrlSym {
+    LineBreak {
+        LineBreakCtrlSym OptionalArgument?
+    }
+}
+
+textBase {
+    ( Command
+    | DollarMath
+    | BracketMath
+    | ParenMath
+    | NewLine
+    | Normal
+    | Whitespace
+    | Ampersand
+    | Tilde
+    )
+}
+
+textWithBrackets {
+    ( textBase
+    | OpenBracket
+    | CloseBracket
+    )
+}
+
+textWithEnvironmentsAndBlankLines {
+    ( BlankLine
+    | KnownEnvironment
+    | Environment
+    | textWithBrackets
+    )
+}
+
+textWithGroupsEnvironmentsAndBlankLines {
+    textWithEnvironmentsAndBlankLines
+    | Group<Text>
+}
+
+Content<Element> {
+    Element
+}
+
+SectioningCommand<Command> {
+    Command optionalWhitespace? "*"? optionalWhitespace? OptionalArgument? optionalWhitespace? SectioningArgument
+}
+
+documentSection<Command, Next> {
+    SectioningCommand<Command> Content<(sectionText | !section Next)*>
+}
+Book[@isGroup="$Section"] { documentSection<BookCtrlSeq, Part | Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
+Part[@isGroup="$Section"] { documentSection<PartCtrlSeq, Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
+Chapter[@isGroup="$Section"] { documentSection<ChapterCtrlSeq, Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
+Section[@isGroup="$Section"] { documentSection<SectionCtrlSeq, SubSection | SubSubSection | Paragraph | SubParagraph> }
+SubSection[@isGroup="$Section"] { documentSection<SubSectionCtrlSeq, SubSubSection | Paragraph | SubParagraph> }
+SubSubSection[@isGroup="$Section"] { documentSection<SubSubSectionCtrlSeq, Paragraph | SubParagraph> }
+Paragraph[@isGroup="$Section"] { documentSection<ParagraphCtrlSeq, SubParagraph> }
+SubParagraph[@isGroup="$Section"] { SectioningCommand<SubParagraphCtrlSeq> Content<sectionText*> }
+
+sectioningCommand {
+    Book | Part | Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph
+}
+
+sectionText {
+    !section (
+        textWithGroupsEnvironmentsAndBlankLines
+    )+
+}
+
+Text {
+   ( sectionText
+    | sectioningCommand)+
+}
+
+LongArg {
+    ( textWithBrackets
+    | NonEmptyGroup<LongArg>
+    | KnownEnvironment
+    | Environment
+    | BlankLine
+    | "#" // macro character
+    | "_" | "^" // other math chars
+    )*
+}
+
+ShortTextArgument {
+    OpenBrace ShortArg CloseBrace
+}
+
+ShortArg {
+    ( textWithBrackets
+    | NonEmptyGroup<ShortArg>
+    | "#" // macro character
+    | "_" | "^" // other math chars
+    )*
+}
+
+ShortOptionalArg {
+    ( textBase
+    | NonEmptyGroup<ShortOptionalArg>
+    | "#" // macro character
+    | "_" // underscore is used in some parameter names
+    )*
+}
+
+TikzPictureContent {  /// same as Text but with added allowed characters
+    ( textWithEnvironmentsAndBlankLines
+    | NonEmptyGroup<TikzPictureContent>
+    | "#" // macro character
+    | "_" | "^" // other math chars
+    )+
+}
+
+DefinitionFragment {
+    ( DefinitionFragmentCommand
+    | Begin
+    | End
+    | Group<DefinitionFragment>
+    | Dollar
+    | OpenParenCtrlSym
+    | CloseParenCtrlSym
+    | OpenBracketCtrlSym
+    | CloseBracketCtrlSym
+    | LeftCtrlSeq
+    | RightCtrlSeq
+    | BlankLine
+    | NewLine
+    | Normal
+    | Whitespace
+    | OpenBracket
+    | CloseBracket
+    | "#" // macro character
+    | Ampersand // for tables
+    | Tilde // unbreakable space
+    | "_" | "^" // other math chars
+    | SectioningCommand<
+        BookCtrlSeq |
+        PartCtrlSeq |
+        ChapterCtrlSeq |
+        SectionCtrlSeq |
+        SubSectionCtrlSeq |
+        SubSubSectionCtrlSeq |
+        ParagraphCtrlSeq |
+        SubParagraphCtrlSeq
+      >
+    )+
+}
+
+DefinitionFragmentArgument {
+    OpenBrace DefinitionFragment? CloseBrace
+}
+
+DefinitionFragmentCommand {
+      KnownCommand<TextArgument>
+    | DefinitionFragmentUnknownCommand { genericUnknownCommandWithOptionalArguments<DefinitionFragmentArgument, OptionalArgument> }
+    | KnownCtrlSym
+}
+
+KnownEnvironment {
+    ( DocumentEnvironment
+    | TabularEnvironment
+    | EquationEnvironment
+    | EquationArrayEnvironment
+    | VerbatimEnvironment
+    | TikzPictureEnvironment
+    | FigureEnvironment
+    | ListEnvironment
+    | TableEnvironment
+    )
+}
+
+BeginEnv<name> {
+  Begin
+  EnvNameGroup<name>
+  OptionalArgument?
+  (!argument TextArgument)*
+}
+
+EndEnv<name> {
+  End
+  EnvNameGroup<name>
+}
+
+DocumentEnvironment[@isGroup="$Environment"] {
+    BeginEnv<DocumentEnvName>
+    Content<Text>
+    EndEnv<DocumentEnvName>
+    (TrailingWhitespaceOnly | TrailingContent)?
+}
+
+TabularContent {
+    (textWithGroupsEnvironmentsAndBlankLines)*
+}
+
+TabularEnvironment[@isGroup="$Environment"] {
+    BeginEnv<TabularEnvName>
+    Content<TabularContent>
+    EndEnv<TabularEnvName>
+}
+
+TableEnvironment[@isGroup="$Environment"] {
+    BeginEnv<TableEnvName>
+    Content<Text>
+    EndEnv<TableEnvName>
+}
+
+EquationEnvironment[@isGroup="$Environment"] {
+    BeginEnv<EquationEnvName>
+    Content<Math?>
+    EndEnv<EquationEnvName>
+}
+
+EquationArrayEnvironment[@isGroup="$Environment"] {
+    BeginEnv<EquationArrayEnvName>
+    Content<Math?>
+    EndEnv<EquationArrayEnvName>
+}
+
+VerbatimEnvironment[@isGroup="$Environment"] {
+    BeginEnv<VerbatimEnvName>
+    Content<VerbatimContent>
+    EndEnv<VerbatimEnvName>
+}
+
+TikzPictureEnvironment[@isGroup="$Environment"] {
+    BeginEnv<TikzPictureEnvName>
+    Content<TikzPictureContent>
+    EndEnv<TikzPictureEnvName>
+}
+
+FigureEnvironment[@isGroup="$Environment"] {
+    BeginEnv<FigureEnvName>
+    Content<Text>
+    EndEnv<FigureEnvName>
+}
+
+ListEnvironment[@isGroup="$Environment"] {
+    BeginEnv<ListEnvName>
+    Content<Text>
+    EndEnv<ListEnvName>
+}
+
+EnvNameGroup<name> {
+    OpenBrace name CloseBrace
+}
+
+Environment[@isGroup="$Environment"] {
+    BeginEnv<EnvName?>
+    Content<Text>
+    EndEnv<EnvName?>
+}
+
+Group<GroupContent> {
+    OpenBrace GroupContent? CloseBrace
+}
+
+NonEmptyGroup<GroupContent> {
+    OpenBrace GroupContent CloseBrace
+}
+
+///  MATH MODE
+
+DollarMath[@isGroup="$MathContainer"] {
+  Dollar (InlineMath | DisplayMath) Dollar
+}
+
+InlineMath {
+    Math
+}
+
+DisplayMath {
+    Dollar Math? Dollar
+}
+
+
+OpenParenMath[closedBy=CloseParenMath] {
+    OpenParenCtrlSym
+}
+
+CloseParenMath[openedBy=OpenParenMath] {
+    CloseParenCtrlSym
+}
+
+// alternative syntax \( math \) for inline math, it is the same as $ math $
+ParenMath[@isGroup="$MathContainer"] {
+    OpenParenMath
+    Math?
+    CloseParenMath
+}
+
+OpenBracketMath[closedBy=CloseBracketMath] {
+    OpenBracketCtrlSym
+}
+
+CloseBracketMath[openedBy=OpenBracketMath] {
+    CloseBracketCtrlSym
+}
+
+// alternative syntax \[ math \] for display math, it is the same as $$ math $$
+BracketMath[@isGroup="$MathContainer"] {
+    OpenBracketMath
+    Math?
+    CloseBracketMath
+}
+
+// FIXME: we should have separate math modes for inline and display math,
+// because display math can contain blank lines while inline math cannot.
+
+Math {
+    ( MathCommand
+    | Group<Math>
+    | MathDelimitedGroup
+    | MathSpecialChar
+    | Number
+    | NewLine
+    | Whitespace
+    | KnownEnvironment
+    | Environment
+    | MathChar
+    | OpenBracket
+    | CloseBracket
+    | Ampersand
+    | Tilde
+    )+
+}
+
+
+MathCommand {
+    KnownCommand<MathArgument>
+  | MathUnknownCommand { genericUnknownCommand<MathArgument> }
+  | KnownCtrlSym
+}
+
+@external tokens argumentListTokenizer from "./tokens.mjs" {
+    hasMoreArguments,
+    endOfArguments
+}
+
+@external tokens argumentListWithOptionalTokenizer from "./tokens.mjs" {
+    hasMoreArgumentsOrOptionals,
+    endOfArgumentsAndOptionals
+}
+
+genericUnknownCommand<ArgumentType> {
+    CtrlSeq (hasMoreArguments optionalWhitespace? ArgumentType)* endOfArguments
+  | CtrlSym
+}
+
+genericUnknownCommandWithOptionalArguments<ArgumentType, OptionalArgumentType> {
+    CtrlSeq (hasMoreArgumentsOrOptionals optionalWhitespace? (ArgumentType | OptionalArgumentType))* endOfArgumentsAndOptionals
+  | CtrlSym
+}
+
+MathArgument {
+    OpenBrace Math? CloseBrace
+}
+
+MathDelimitedGroup {
+    MathOpening Math? MathClosing
+}
+
+// FIXME: we have the same problem with specialize on \left,\right as the delimiters
+MathOpening {
+    LeftCtrlSeq optionalWhitespace? MathDelimiter
+}
+
+MathClosing {
+    RightCtrlSeq optionalWhitespace? MathDelimiter
+}
+
+MathDelimiter {
+    // Allowed delimiters, from the LaTeX manual, table 3.10
+    "/" | "|" | "(" | ")" | "[" | "]" |
+   "\\{" | "\\}" | "\\|" |
+    "\\lfloor" | "\\rfloor" |
+    "\\lceil" | "\\rceil" |
+    "\\langle" | "\\rangle" |
+    "\\backslash" | "\\uparrow" |
+    "\\Uparrow" | "\\Downarrow" |
+    "\\updownarrow" | "\\Updownarrow" |
+    "\\downarrow" | "\\lvert" |
+    "\\lVert" | "\\rVert" |
+    "\\rvert" | "\\vert" | "\\Vert" |
+    "\\lbrace" | "\\rbrace" |
+    "\\lbrack" | "\\rbrack" |
+    // Also allow the empty match
+    "."
+}
+
+// NOTE: precedence works differently for rules and token, in the rule
+// you have to give a specifier !foo which is defined in the @precedence
+// block here.
+
+@precedence {
+    section @left,
+    argument @left  // make CtrlSeq arguments left associative
+}
--- a/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs
+++ b/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs
@@ -0,0 +1,747 @@
+/* Hand-written tokenizer for LaTeX. */
+
+import { ExternalTokenizer, ContextTracker } from '@lezer/lr'
+
+import {
+  LiteralArgContent,
+  SpaceDelimitedLiteralArgContent,
+  VerbContent,
+  VerbatimContent,
+  LstInlineContent,
+  Begin,
+  End,
+  KnownEnvironment,
+  Csname,
+  TrailingWhitespaceOnly,
+  TrailingContent,
+  RefCtrlSeq,
+  RefStarrableCtrlSeq,
+  CiteCtrlSeq,
+  CiteStarrableCtrlSeq,
+  LabelCtrlSeq,
+  MathTextCtrlSeq,
+  HboxCtrlSeq,
+  TitleCtrlSeq,
+  AuthorCtrlSeq,
+  AffilCtrlSeq,
+  AffiliationCtrlSeq,
+  DateCtrlSeq,
+  DocumentClassCtrlSeq,
+  UsePackageCtrlSeq,
+  HrefCtrlSeq,
+  UrlCtrlSeq,
+  VerbCtrlSeq,
+  LstInlineCtrlSeq,
+  IncludeGraphicsCtrlSeq,
+  CaptionCtrlSeq,
+  DefCtrlSeq,
+  LetCtrlSeq,
+  LeftCtrlSeq,
+  RightCtrlSeq,
+  NewCommandCtrlSeq,
+  RenewCommandCtrlSeq,
+  NewEnvironmentCtrlSeq,
+  RenewEnvironmentCtrlSeq,
+  DocumentEnvName,
+  TabularEnvName,
+  EquationEnvName,
+  EquationArrayEnvName,
+  VerbatimEnvName,
+  TikzPictureEnvName,
+  FigureEnvName,
+  OpenParenCtrlSym,
+  CloseParenCtrlSym,
+  OpenBracketCtrlSym,
+  CloseBracketCtrlSym,
+  LineBreakCtrlSym,
+  // Sectioning commands
+  BookCtrlSeq,
+  PartCtrlSeq,
+  ChapterCtrlSeq,
+  SectionCtrlSeq,
+  SubSectionCtrlSeq,
+  SubSubSectionCtrlSeq,
+  ParagraphCtrlSeq,
+  SubParagraphCtrlSeq,
+  InputCtrlSeq,
+  IncludeCtrlSeq,
+  ItemCtrlSeq,
+  NewTheoremCtrlSeq,
+  TheoremStyleCtrlSeq,
+  BibliographyCtrlSeq,
+  BibliographyStyleCtrlSeq,
+  CenteringCtrlSeq,
+  ListEnvName,
+  MaketitleCtrlSeq,
+  TextColorCtrlSeq,
+  ColorBoxCtrlSeq,
+  HLineCtrlSeq,
+  TopRuleCtrlSeq,
+  MidRuleCtrlSeq,
+  BottomRuleCtrlSeq,
+  TableEnvName,
+  MultiColumnCtrlSeq,
+  ParBoxCtrlSeq,
+  // Marker for end of argument lists
+  endOfArguments,
+  hasMoreArguments,
+  hasMoreArgumentsOrOptionals,
+  endOfArgumentsAndOptionals,
+  TextBoldCtrlSeq,
+  TextItalicCtrlSeq,
+  TextSmallCapsCtrlSeq,
+  TextTeletypeCtrlSeq,
+  TextMediumCtrlSeq,
+  TextSansSerifCtrlSeq,
+  TextSuperscriptCtrlSeq,
+  TextSubscriptCtrlSeq,
+  TextStrikeOutCtrlSeq,
+  EmphasisCtrlSeq,
+  UnderlineCtrlSeq,
+  SetLengthCtrlSeq,
+} from './latex.terms.mjs'
+
+const MAX_ARGUMENT_LOOKAHEAD = 100
+
+function nameChar(ch) {
+  // we accept A-Z a-z 0-9 * + @ in environment names
+  return (
+    (ch >= 65 && ch <= 90) ||
+    (ch >= 97 && ch <= 122) ||
+    (ch >= 48 && ch <= 57) ||
+    ch === 42 ||
+    ch === 43 ||
+    ch === 64
+  )
+}
+
+// match [a-zA-Z]
+function alphaChar(ch) {
+  return (ch >= 65 && ch <= 90) || (ch >= 97 && ch <= 122)
+}
+
+let cachedName = null
+let cachedInput = null
+let cachedPos = 0
+function envNameAfter(input, offset) {
+  const pos = input.pos + offset
+  if (cachedInput === input && cachedPos === pos) {
+    return cachedName
+  }
+  if (input.peek(offset) !== '{'.charCodeAt(0)) return
+  offset++
+  let name = ''
+  for (;;) {
+    const next = input.peek(offset)
+    if (!nameChar(next)) break
+    name += String.fromCharCode(next)
+    offset++
+  }
+  cachedInput = input
+  cachedPos = pos
+  return (cachedName = name || null)
+}
+
+function ElementContext(name, parent) {
+  this.name = name
+  this.parent = parent
+  this.hash = parent ? parent.hash : 0
+  for (let i = 0; i < name.length; i++)
+    this.hash +=
+      (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8)
+}
+
+export const elementContext = new ContextTracker({
+  start: null,
+  shift(context, term, stack, input) {
+    return term === Begin
+      ? new ElementContext(envNameAfter(input, '\\begin'.length) || '', context)
+      : context
+  },
+  reduce(context, term) {
+    return term === KnownEnvironment && context ? context.parent : context
+  },
+  reuse(context, node, _stack, input) {
+    const type = node.type.id
+    return type === Begin
+      ? new ElementContext(envNameAfter(input, 0) || '', context)
+      : context
+  },
+  hash(context) {
+    return context ? context.hash : 0
+  },
+  strict: false,
+})
+
+// tokenizer for \verb|...| commands
+export const verbTokenizer = new ExternalTokenizer(
+  (input, stack) => {
+    if (input.next === '*'.charCodeAt(0)) input.advance()
+    const delimiter = input.next
+    if (delimiter === -1) return // hit end of file
+    if (/\s|\*/.test(String.fromCharCode(delimiter))) return // invalid delimiter
+    input.advance()
+    for (;;) {
+      const next = input.next
+      if (next === -1 || next === CHAR_NEWLINE) return
+      input.advance()
+      if (next === delimiter) break
+    }
+    return input.acceptToken(VerbContent)
+  },
+  { contextual: false }
+)
+
+// tokenizer for \lstinline|...| commands
+export const lstinlineTokenizer = new ExternalTokenizer(
+  (input, stack) => {
+    let delimiter = input.next
+    if (delimiter === -1) return // hit end of file
+    if (/\s/.test(String.fromCharCode(delimiter))) {
+      return // invalid delimiter
+    }
+    if (delimiter === CHAR_OPEN_BRACE) {
+      delimiter = CHAR_CLOSE_BRACE
+    }
+    input.advance()
+    for (;;) {
+      const next = input.next
+      if (next === -1 || next === CHAR_NEWLINE) return
+      input.advance()
+      if (next === delimiter) break
+    }
+    return input.acceptToken(LstInlineContent)
+  },
+  { contextual: false }
+)
+
+const matchForward = (input, expected, offset = 0) => {
+  for (let i = 0; i < expected.length; i++) {
+    if (String.fromCharCode(input.peek(offset + i)) !== expected[i]) {
+      return false
+    }
+  }
+  return true
+}
+
+// tokenizer for \begin{verbatim}...\end{verbatim} environments
+export const verbatimTokenizer = new ExternalTokenizer(
+  (input, stack) => {
+    const delimiter = '\\end{' + stack.context.name + '}'
+    for (let offset = 0; ; offset++) {
+      const next = input.peek(offset)
+      if (next === -1 || matchForward(input, delimiter, offset)) {
+        return input.acceptToken(VerbatimContent, offset)
+      }
+    }
+  },
+  { contextual: false }
+)
+
+// tokenizer for \href{...} and similar commands
+export const literalArgTokenizer = new ExternalTokenizer(
+  input => {
+    for (let offset = 0; ; offset++) {
+      const next = input.peek(offset)
+      if (next === -1 || next === CHAR_CLOSE_BRACE) {
+        return input.acceptToken(LiteralArgContent, offset)
+      }
+    }
+  },
+  { contextual: false }
+)
+
+// tokenizer for literal content delimited by whitespace, such as in `\input foo.tex`
+export const spaceDelimitedLiteralArgTokenizer = new ExternalTokenizer(
+  input => {
+    for (let offset = 0; ; offset++) {
+      const next = input.peek(offset)
+      if (next === -1 || next === CHAR_SPACE || next === CHAR_NEWLINE) {
+        return input.acceptToken(SpaceDelimitedLiteralArgContent, offset)
+      }
+    }
+  },
+  { contextual: false }
+)
+
+// helper function to look up charCodes
+function _char(s) {
+  return s.charCodeAt(0)
+}
+
+const CHAR_BACKSLASH = _char('\\')
+const CHAR_OPEN_BRACE = _char('{')
+const CHAR_OPEN_BRACKET = _char('[')
+const CHAR_CLOSE_BRACE = _char('}')
+const CHAR_TAB = _char('\t')
+const CHAR_SPACE = _char(' ')
+const CHAR_NEWLINE = _char('\n')
+
+const lookaheadTokenizer = getToken =>
+  new ExternalTokenizer(
+    input => {
+      for (let i = 0; i < MAX_ARGUMENT_LOOKAHEAD; ++i) {
+        const next = input.peek(i)
+        if (next === CHAR_SPACE || next === CHAR_TAB) {
+          continue
+        }
+        const token = getToken(next)
+        if (token) {
+          input.acceptToken(token)
+          return
+        }
+      }
+    },
+    { contextual: false, fallback: true }
+  )
+
+export const argumentListTokenizer = lookaheadTokenizer(next => {
+  if (next === CHAR_OPEN_BRACE) {
+    return hasMoreArguments
+  } else {
+    return endOfArguments
+  }
+})
+
+export const argumentListWithOptionalTokenizer = lookaheadTokenizer(next => {
+  if (next === CHAR_OPEN_BRACE || next === CHAR_OPEN_BRACKET) {
+    return hasMoreArgumentsOrOptionals
+  } else {
+    return endOfArgumentsAndOptionals
+  }
+})
+
+const CHAR_AT_SYMBOL = _char('@')
+
+export const csnameTokenizer = new ExternalTokenizer((input, stack) => {
+  let offset = 0
+  let end = -1
+  // look at the first character, we are looking for acceptable control sequence names
+  // including @ signs, \\[a-zA-Z@]+
+  const next = input.peek(offset)
+  if (next === -1) {
+    return
+  }
+  // reject anything not starting with a backslash,
+  // we only accept control sequences
+  if (next !== CHAR_BACKSLASH) {
+    return
+  }
+  offset++
+  for (;;) {
+    const next = input.peek(offset)
+    // stop when we reach the end of file or a non-csname character
+    if (next === -1 || !(alphaChar(next) || next === CHAR_AT_SYMBOL)) {
+      end = offset - 1
+      break
+    }
+    end = offset
+    offset++
+  }
+  if (end === -1) return
+  // accept the content as a valid control sequence
+  return input.acceptToken(Csname, end + 1)
+})
+
+const END_DOCUMENT_MARK = '\\end{document}'.split('').reverse()
+
+export const trailingContentTokenizer = new ExternalTokenizer(
+  (input, stack) => {
+    if (input.next === -1) return // no trailing content
+    // Look back for end-document mark, bail out if any characters do not match
+    for (let i = 1; i < END_DOCUMENT_MARK.length + 1; i++) {
+      if (String.fromCharCode(input.peek(-i)) !== END_DOCUMENT_MARK[i - 1]) {
+        return
+      }
+    }
+    while (input.next === CHAR_SPACE || input.next === CHAR_NEWLINE) {
+      const next = input.advance()
+      if (next === -1) return input.acceptToken(TrailingWhitespaceOnly) // trailing whitespace only
+    }
+    // accept the all content up to the end of the document
+    while (input.advance() !== -1) {
+      //
+    }
+    return input.acceptToken(TrailingContent)
+  }
+)
+
+const refCommands = new Set([
+  '\\fullref',
+  '\\Vref',
+  '\\autopageref',
+  '\\autoref',
+  '\\eqref',
+  '\\labelcpageref',
+  '\\labelcref',
+  '\\lcnamecref',
+  '\\lcnamecrefs',
+  '\\namecref',
+  '\\nameCref',
+  '\\namecrefs',
+  '\\nameCrefs',
+  '\\thnameref',
+  '\\thref',
+  '\\titleref',
+  '\\vrefrange',
+  '\\Crefrange',
+  '\\Crefrang',
+  '\\fref',
+  '\\pref',
+  '\\tref',
+  '\\Aref',
+  '\\Bref',
+  '\\Pref',
+  '\\Sref',
+  '\\vref',
+  '\\nameref',
+])
+
+const refStarrableCommands = new Set([
+  '\\vpageref',
+  '\\vref',
+  '\\zcpageref',
+  '\\zcref',
+  '\\zfullref',
+  '\\zref',
+  '\\zvpageref',
+  '\\zvref',
+  '\\cref',
+  '\\Cref',
+  '\\pageref',
+  '\\ref',
+  '\\Ref',
+  '\\subref',
+  '\\zpageref',
+  '\\ztitleref',
+  '\\vpagerefrange',
+  '\\zvpagerefrange',
+  '\\zvrefrange',
+  '\\crefrange',
+])
+
+const citeCommands = new Set([
+  '\\autocites',
+  '\\Autocites',
+  '\\Cite',
+  '\\citeA',
+  '\\citealp',
+  '\\Citealp',
+  '\\citealt',
+  '\\Citealt',
+  '\\citeauthorNP',
+  '\\citeauthorp',
+  '\\Citeauthorp',
+  '\\citeauthort',
+  '\\Citeauthort',
+  '\\citeNP',
+  '\\citenum',
+  '\\citen',
+  '\\citeonline',
+  '\\cites',
+  '\\Cites',
+  '\\citeurl',
+  '\\citeyearpar',
+  '\\defcitealias',
+  '\\fnotecite',
+  '\\footcite',
+  '\\footcitetext',
+  '\\footfullcite',
+  '\\footnotecites',
+  '\\Footnotecites',
+  '\\fullcite',
+  '\\fullciteA',
+  '\\fullciteauthor',
+  '\\fullciteauthorNP',
+  '\\maskcite',
+  '\\maskciteA',
+  '\\maskcitealp',
+  '\\maskCitealp',
+  '\\maskcitealt',
+  '\\maskCitealt',
+  '\\maskciteauthor',
+  '\\maskciteauthorNP',
+  '\\maskciteauthorp',
+  '\\maskCiteauthorp',
+  '\\maskciteauthort',
+  '\\maskCiteauthort',
+  '\\maskciteNP',
+  '\\maskcitenum',
+  '\\maskcitep',
+  '\\maskCitep',
+  '\\maskcitepalias',
+  '\\maskcitet',
+  '\\maskCitet',
+  '\\maskcitetalias',
+  '\\maskciteyear',
+  '\\maskciteyearNP',
+  '\\maskciteyearpar',
+  '\\maskfullcite',
+  '\\maskfullciteA',
+  '\\maskfullciteauthor',
+  '\\maskfullciteauthorNP',
+  '\\masknocite',
+  '\\maskshortcite',
+  '\\maskshortciteA',
+  '\\maskshortciteauthor',
+  '\\maskshortciteauthorNP',
+  '\\maskshortciteNP',
+  '\\mautocite',
+  '\\Mautocite',
+  '\\mcite',
+  '\\Mcite',
+  '\\mfootcite',
+  '\\mfootcitetext',
+  '\\mparencite',
+  '\\Mparencite',
+  '\\msupercite',
+  '\\mtextcite',
+  '\\Mtextcite',
+  '\\nocite',
+  '\\nocitemeta',
+  '\\notecite',
+  '\\Parencite',
+  '\\parencites',
+  '\\Parencites',
+  '\\pnotecite',
+  '\\shortcite',
+  '\\shortciteA',
+  '\\shortciteauthor',
+  '\\shortciteauthorNP',
+  '\\shortciteNP',
+  '\\smartcite',
+  '\\Smartcite',
+  '\\smartcites',
+  '\\Smartcites',
+  '\\supercite',
+  '\\supercites',
+  '\\textcite',
+  '\\Textcite',
+  '\\textcites',
+  '\\Textcites',
+])
+
+const citeStarredCommands = new Set([
+  '\\cite',
+  '\\citeauthor',
+  '\\Citeauthor',
+  '\\citedate',
+  '\\citep',
+  '\\citepalias',
+  '\\Citep',
+  '\\citetitle',
+  '\\citeyear',
+  '\\parencite',
+  '\\citet',
+  '\\citetalias',
+  '\\autocite',
+  '\\Autocite',
+])
+
+const labelCommands = new Set(['\\label', '\\thlabel', '\\zlabel'])
+
+const mathTextCommands = new Set(['\\text', '\\tag', '\\textrm', '\\intertext'])
+
+const otherKnowncommands = {
+  '\\hbox': HboxCtrlSeq,
+  '\\title': TitleCtrlSeq,
+  '\\author': AuthorCtrlSeq,
+  '\\affil': AffilCtrlSeq,
+  '\\affiliation': AffiliationCtrlSeq,
+  '\\date': DateCtrlSeq,
+  '\\documentclass': DocumentClassCtrlSeq,
+  '\\usepackage': UsePackageCtrlSeq,
+  '\\href': HrefCtrlSeq,
+  '\\url': UrlCtrlSeq,
+  '\\verb': VerbCtrlSeq,
+  '\\lstinline': LstInlineCtrlSeq,
+  '\\includegraphics': IncludeGraphicsCtrlSeq,
+  '\\caption': CaptionCtrlSeq,
+  '\\def': DefCtrlSeq,
+  '\\let': LetCtrlSeq,
+  '\\left': LeftCtrlSeq,
+  '\\right': RightCtrlSeq,
+  '\\newcommand': NewCommandCtrlSeq,
+  '\\renewcommand': RenewCommandCtrlSeq,
+  '\\newenvironment': NewEnvironmentCtrlSeq,
+  '\\renewenvironment': RenewEnvironmentCtrlSeq,
+  '\\book': BookCtrlSeq,
+  '\\part': PartCtrlSeq,
+  '\\addpart': PartCtrlSeq,
+  '\\chapter': ChapterCtrlSeq,
+  '\\addchap': ChapterCtrlSeq,
+  '\\section': SectionCtrlSeq,
+  '\\addseq': SectionCtrlSeq,
+  '\\subsection': SubSectionCtrlSeq,
+  '\\subsubsection': SubSubSectionCtrlSeq,
+  '\\paragraph': ParagraphCtrlSeq,
+  '\\subparagraph': SubParagraphCtrlSeq,
+  '\\input': InputCtrlSeq,
+  '\\include': IncludeCtrlSeq,
+  '\\item': ItemCtrlSeq,
+  '\\centering': CenteringCtrlSeq,
+  '\\newtheorem': NewTheoremCtrlSeq,
+  '\\theoremstyle': TheoremStyleCtrlSeq,
+  '\\bibliography': BibliographyCtrlSeq,
+  '\\bibliographystyle': BibliographyStyleCtrlSeq,
+  '\\maketitle': MaketitleCtrlSeq,
+  '\\textcolor': TextColorCtrlSeq,
+  '\\colorbox': ColorBoxCtrlSeq,
+  '\\hline': HLineCtrlSeq,
+  '\\toprule': TopRuleCtrlSeq,
+  '\\midrule': MidRuleCtrlSeq,
+  '\\bottomrule': BottomRuleCtrlSeq,
+  '\\multicolumn': MultiColumnCtrlSeq,
+  '\\parbox': ParBoxCtrlSeq,
+  '\\textbf': TextBoldCtrlSeq,
+  '\\textit': TextItalicCtrlSeq,
+  '\\textsc': TextSmallCapsCtrlSeq,
+  '\\texttt': TextTeletypeCtrlSeq,
+  '\\textmd': TextMediumCtrlSeq,
+  '\\textsf': TextSansSerifCtrlSeq,
+  '\\textsuperscript': TextSuperscriptCtrlSeq,
+  '\\textsubscript': TextSubscriptCtrlSeq,
+  '\\sout': TextStrikeOutCtrlSeq,
+  '\\emph': EmphasisCtrlSeq,
+  '\\underline': UnderlineCtrlSeq,
+  '\\setlength': SetLengthCtrlSeq,
+}
+// specializer for control sequences
+// return new tokens for specific control sequences
+export const specializeCtrlSeq = (name, terms) => {
+  if (name === '\\begin') return Begin
+  if (name === '\\end') return End
+  if (refCommands.has(name)) {
+    return RefCtrlSeq
+  }
+  if (refStarrableCommands.has(name)) {
+    return RefStarrableCtrlSeq
+  }
+  if (citeCommands.has(name)) {
+    return CiteCtrlSeq
+  }
+  if (citeStarredCommands.has(name)) {
+    return CiteStarrableCtrlSeq
+  }
+  if (labelCommands.has(name)) {
+    return LabelCtrlSeq
+  }
+  if (mathTextCommands.has(name)) {
+    return MathTextCtrlSeq
+  }
+  return otherKnowncommands[name] || -1
+}
+
+const tabularEnvNames = new Set([
+  'tabular',
+  'xltabular',
+  'tabularx',
+  'longtable',
+])
+
+const equationEnvNames = new Set([
+  'equation',
+  'equation*',
+  'displaymath',
+  'displaymath*',
+  'math',
+  'math*',
+  'multline',
+  'multline*',
+  'matrix',
+  'tikzcd',
+])
+
+const equationArrayEnvNames = new Set([
+  'array',
+  'eqnarray',
+  'eqnarray*',
+  'align',
+  'align*',
+  'alignat',
+  'alignat*',
+  'flalign',
+  'flalign*',
+  'gather',
+  'gather*',
+  'pmatrix',
+  'pmatrix*',
+  'bmatrix',
+  'bmatrix*',
+  'Bmatrix',
+  'Bmatrix*',
+  'vmatrix',
+  'vmatrix*',
+  'Vmatrix',
+  'Vmatrix*',
+  'smallmatrix',
+  'smallmatrix*',
+  'split',
+  'split*',
+  'gathered',
+  'gathered*',
+  'aligned',
+  'aligned*',
+  'alignedat',
+  'alignedat*',
+  'cases',
+  'cases*',
+  'dcases',
+  'dcases*',
+  'rcases',
+  'rcases*',
+  'IEEEeqnarray',
+  'IEEEeqnarray*',
+])
+
+const verbatimEnvNames = new Set([
+  'verbatim',
+  'boxedverbatim',
+  'lstlisting',
+  'minted',
+  'Verbatim',
+  'lstlisting',
+  'tcblisting',
+  'codeexample',
+  'comment',
+])
+
+const otherKnownEnvNames = {
+  document: DocumentEnvName,
+  tikzpicture: TikzPictureEnvName,
+  figure: FigureEnvName,
+  'figure*': FigureEnvName,
+  subfigure: FigureEnvName,
+  enumerate: ListEnvName,
+  itemize: ListEnvName,
+  table: TableEnvName,
+  description: ListEnvName,
+}
+
+export const specializeEnvName = (name, terms) => {
+  if (tabularEnvNames.has(name)) {
+    return TabularEnvName
+  }
+  if (equationEnvNames.has(name)) {
+    return EquationEnvName
+  }
+  if (equationArrayEnvNames.has(name)) {
+    return EquationArrayEnvName
+  }
+  if (verbatimEnvNames.has(name)) {
+    return VerbatimEnvName
+  }
+  return otherKnownEnvNames[name] || -1
+}
+
+const otherKnownCtrlSyms = {
+  '\\(': OpenParenCtrlSym,
+  '\\)': CloseParenCtrlSym,
+  '\\[': OpenBracketCtrlSym,
+  '\\]': CloseBracketCtrlSym,
+  '\\\\': LineBreakCtrlSym,
+}
+
+export const specializeCtrlSym = (name, terms) => {
+  return otherKnownCtrlSyms[name] || -1
+}