first commit
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
# Lezer-LaTeX, a LaTeX Parser
|
||||
|
||||
Lezer-LaTeX is a LaTeX parser implemented with [lezer](https://lezer.codemirror.net/), the parser system used by [CodeMirror 6](https://codemirror.net/6/).
|
||||
|
||||
The parser is written in a "grammar" file, (and a "tokens" file with custom tokenizer logic) which is then compiled by `@lezer/generator` into a parser module and a "terms" module. The parser module is then loaded by the CodeMirror 6 in the web frontend codebase.
|
||||
|
||||
|
||||
## Important files
|
||||
|
||||
- Source files:
|
||||
- `./latex.grammar`: The grammar file, containing the specification for the parser
|
||||
- `./tokens.mjs`: The custom tokenizer logic, required by some rules in the grammar
|
||||
|
||||
- Generated files:
|
||||
- `./latex.mjs`: The generated parser
|
||||
- `./latex.terms.mjs`: The generated terms file
|
||||
- (these files are ignored by git, eslint, and prettier)
|
||||
|
||||
- Scripts:
|
||||
- `web/scripts/lezer-latex/generate.js`: A script which runs the generator on the grammar, producing the generated parser/terms files
|
||||
- `web/scripts/lezer-latex/run.mjs`: A script that runs the parser against a supplied file, and prints the tree to the terminal
|
||||
|
||||
- Webpack plugins:
|
||||
- `web/webpack-plugins/lezer-grammar-compiler.js`: A webpack plugin that calls the generator as part of the webpack build. In dev, it will automatically re-build the parser when the grammar file changes.
|
||||
|
||||
|
||||
## NPM tasks
|
||||
|
||||
- `lezer-latex:generate`: Generate the parser files from the grammar
|
||||
- (Calls `lezer-latex/generate.js`)
|
||||
- This should be run whenever the grammar changes
|
||||
|
||||
- `lezer-latex:run`: Run the parser against a file
|
||||
- (Calls `lezer-latex/run.js`)
|
||||
|
||||
|
||||
### Generating the parser
|
||||
|
||||
From the monorepo root:
|
||||
|
||||
``` sh
|
||||
bin/npm -w services/web run 'lezer-latex:generate'
|
||||
```
|
||||
|
||||
|
||||
## Tests
|
||||
|
||||
Unit tests for the parser live in `web/test/unit/src/LezerLatex`. There are three kinds of test, in three subdirectories:
|
||||
|
||||
- `corpus/`: A set of tests using lezer's test framework, consisting of example text and the expected parse tree
|
||||
- `examples/`: A set of realistic LaTeX documents. These tests pass if the files parse with no errors
|
||||
- `regressions/`: Like `examples/`, these are expected to parse without error, but they are not realistic documents.
|
||||
|
||||
These tests run as part of `test_frontend`. You can run these tests alone by invoking:
|
||||
|
||||
``` sh
|
||||
make test_unit MOCHA_GREP='lezer-latex'
|
||||
```
|
||||
|
||||
|
||||
## Trying the parser
|
||||
|
||||
While developing the parser, you can run it against a file by calling the `lezer-latex:run` task. There are
|
||||
some example files in the test suite, at `web/test/unit/src/LezerLatex/examples/`.
|
||||
|
||||
For example:
|
||||
|
||||
``` sh
|
||||
bin/npm -w services/web run 'lezer-latex:run' web/test/unit/src/LezerLatex/examples/amsmath.tex
|
||||
```
|
||||
|
||||
If you omit the file path, the default file (`examples/demo.tex`) will be run.
|
||||
|
||||
|
||||
## Integration into web
|
||||
|
||||
The web frontend imports the parser (from `latex.mjs`), in `frontend/js/features/source-editor/languages/latex/index.ts`.
|
||||
The parser is then plugged in to the CM6 language system.
|
||||
|
||||
### The web build
|
||||
|
||||
In `web/Dockerfile`, we have a `RUN` command that calls `lezer-latex:generate` as part of the build. This is necessary to ensure the parser is built before the CI tests run (notably: we can't do the build during the tests, because we can't write to disk during that stage of CI).
|
||||
@@ -0,0 +1,829 @@
|
||||
// Track environments
|
||||
|
||||
@context elementContext from "./tokens.mjs"
|
||||
|
||||
// External tokens must be defined before normal @tokens to take precedence
|
||||
// over them.
|
||||
|
||||
@external tokens verbTokenizer from "./tokens.mjs" {
|
||||
VerbContent
|
||||
}
|
||||
|
||||
@external tokens lstinlineTokenizer from "./tokens.mjs" {
|
||||
LstInlineContent
|
||||
}
|
||||
|
||||
@external tokens literalArgTokenizer from "./tokens.mjs" {
|
||||
LiteralArgContent
|
||||
}
|
||||
|
||||
@external tokens spaceDelimitedLiteralArgTokenizer from "./tokens.mjs" {
|
||||
SpaceDelimitedLiteralArgContent
|
||||
}
|
||||
|
||||
@external tokens verbatimTokenizer from "./tokens.mjs" {
|
||||
VerbatimContent
|
||||
}
|
||||
|
||||
// external tokenizer to read control sequence names including @ signs
|
||||
// (which are often used in TeX definitions).
|
||||
@external tokens csnameTokenizer from "./tokens.mjs" {
|
||||
Csname
|
||||
}
|
||||
|
||||
@external tokens trailingContentTokenizer from "./tokens.mjs" {
|
||||
TrailingWhitespaceOnly,
|
||||
TrailingContent
|
||||
}
|
||||
|
||||
// It doesn't seem to be possible to access specialized tokens in the context tracker.
|
||||
// They have id's which are not exported in the latex.terms.js file.
|
||||
// This is a workaround: use an external specializer to explicitly choose the terms
|
||||
// to use for the specialized tokens.
|
||||
@external specialize {CtrlSeq} specializeCtrlSeq from "./tokens.mjs" {
|
||||
Begin,
|
||||
End,
|
||||
RefCtrlSeq,
|
||||
RefStarrableCtrlSeq,
|
||||
CiteCtrlSeq,
|
||||
CiteStarrableCtrlSeq,
|
||||
LabelCtrlSeq,
|
||||
MathTextCtrlSeq,
|
||||
HboxCtrlSeq,
|
||||
TitleCtrlSeq,
|
||||
DocumentClassCtrlSeq,
|
||||
UsePackageCtrlSeq,
|
||||
HrefCtrlSeq,
|
||||
UrlCtrlSeq,
|
||||
VerbCtrlSeq,
|
||||
LstInlineCtrlSeq,
|
||||
IncludeGraphicsCtrlSeq,
|
||||
CaptionCtrlSeq,
|
||||
DefCtrlSeq,
|
||||
LetCtrlSeq,
|
||||
LeftCtrlSeq,
|
||||
RightCtrlSeq,
|
||||
NewCommandCtrlSeq,
|
||||
RenewCommandCtrlSeq,
|
||||
NewEnvironmentCtrlSeq,
|
||||
RenewEnvironmentCtrlSeq,
|
||||
// services/web/frontend/js/features/outline/outline-parser.js
|
||||
BookCtrlSeq,
|
||||
PartCtrlSeq,
|
||||
ChapterCtrlSeq,
|
||||
SectionCtrlSeq,
|
||||
SubSectionCtrlSeq,
|
||||
SubSubSectionCtrlSeq,
|
||||
ParagraphCtrlSeq,
|
||||
SubParagraphCtrlSeq,
|
||||
InputCtrlSeq,
|
||||
IncludeCtrlSeq,
|
||||
ItemCtrlSeq,
|
||||
NewTheoremCtrlSeq,
|
||||
TheoremStyleCtrlSeq,
|
||||
CenteringCtrlSeq,
|
||||
BibliographyCtrlSeq,
|
||||
BibliographyStyleCtrlSeq,
|
||||
AuthorCtrlSeq,
|
||||
AffilCtrlSeq,
|
||||
AffiliationCtrlSeq,
|
||||
DateCtrlSeq,
|
||||
MaketitleCtrlSeq,
|
||||
TextColorCtrlSeq,
|
||||
ColorBoxCtrlSeq,
|
||||
HLineCtrlSeq,
|
||||
TopRuleCtrlSeq,
|
||||
MidRuleCtrlSeq,
|
||||
BottomRuleCtrlSeq,
|
||||
MultiColumnCtrlSeq,
|
||||
ParBoxCtrlSeq,
|
||||
TextBoldCtrlSeq,
|
||||
TextItalicCtrlSeq,
|
||||
TextSmallCapsCtrlSeq,
|
||||
TextTeletypeCtrlSeq,
|
||||
TextMediumCtrlSeq,
|
||||
TextSansSerifCtrlSeq,
|
||||
TextSuperscriptCtrlSeq,
|
||||
TextSubscriptCtrlSeq,
|
||||
TextStrikeOutCtrlSeq,
|
||||
EmphasisCtrlSeq,
|
||||
UnderlineCtrlSeq,
|
||||
SetLengthCtrlSeq
|
||||
}
|
||||
|
||||
@external specialize {EnvName} specializeEnvName from "./tokens.mjs" {
|
||||
DocumentEnvName,
|
||||
TabularEnvName,
|
||||
EquationEnvName,
|
||||
EquationArrayEnvName,
|
||||
VerbatimEnvName,
|
||||
TikzPictureEnvName,
|
||||
FigureEnvName,
|
||||
ListEnvName,
|
||||
TableEnvName
|
||||
}
|
||||
|
||||
@external specialize {CtrlSym} specializeCtrlSym from "./tokens.mjs" {
|
||||
OpenParenCtrlSym,
|
||||
CloseParenCtrlSym,
|
||||
OpenBracketCtrlSym,
|
||||
CloseBracketCtrlSym,
|
||||
LineBreakCtrlSym
|
||||
}
|
||||
|
||||
@tokens {
|
||||
CtrlSeq { "\\" $[a-zA-Z]+ }
|
||||
CtrlSym { "\\" ![a-zA-Z] }
|
||||
|
||||
// tokens for paragraphs
|
||||
Whitespace { $[ \t]+ }
|
||||
NewLine { "\n" }
|
||||
BlankLine { "\n" "\n"+ }
|
||||
Normal { ![\\{}\[\]$&~#^_% \t\n] ![\\{}\[\]$&~#^_%\t\n]* } // everything is normal text, except these characters
|
||||
@precedence { CtrlSeq, CtrlSym, BlankLine, NewLine, Whitespace, Normal }
|
||||
|
||||
OpenBrace[closedBy=CloseBrace] { "{" }
|
||||
CloseBrace[openedBy=OpenBrace] { "}" }
|
||||
OpenBracket[closedBy=CloseBracket] { "[" }
|
||||
CloseBracket[openedBy=OpenBracket] { "]" }
|
||||
|
||||
Comment { "%" ![\n]* "\n"? }
|
||||
|
||||
Dollar { "$" }
|
||||
|
||||
Number { $[0-9]+ ("." $[0-9]*)? }
|
||||
MathSpecialChar { $[^_=<>()\-+/*]+ } // FIXME not all of these are special
|
||||
MathChar { ![0-9^_=<>()\-+/*\\{}\[\]$%&~ \t\n]+ }
|
||||
|
||||
@precedence { Number, MathSpecialChar, MathChar }
|
||||
|
||||
Ampersand { "&" }
|
||||
Tilde { "~" }
|
||||
|
||||
EnvName { $[a-zA-Z]+ $[*]? }
|
||||
}
|
||||
|
||||
@top LaTeX {
|
||||
Text
|
||||
}
|
||||
|
||||
@skip { Comment }
|
||||
|
||||
// TEXT MODE
|
||||
|
||||
optionalWhitespace {
|
||||
!argument Whitespace
|
||||
}
|
||||
|
||||
OptionalArgument {
|
||||
!argument OpenBracket ShortOptionalArg CloseBracket
|
||||
}
|
||||
|
||||
TextArgument {
|
||||
!argument OpenBrace LongArg CloseBrace
|
||||
}
|
||||
|
||||
SectioningArgument {
|
||||
!argument OpenBrace LongArg CloseBrace
|
||||
}
|
||||
|
||||
LabelArgument {
|
||||
!argument ShortTextArgument
|
||||
}
|
||||
|
||||
RefArgument {
|
||||
!argument ShortTextArgument
|
||||
}
|
||||
|
||||
BibKeyArgument {
|
||||
!argument ShortTextArgument
|
||||
}
|
||||
|
||||
PackageArgument {
|
||||
!argument ShortTextArgument
|
||||
}
|
||||
|
||||
TabularArgument {
|
||||
!argument OpenBrace TabularContent CloseBrace
|
||||
}
|
||||
|
||||
UrlArgument {
|
||||
OpenBrace LiteralArgContent CloseBrace
|
||||
}
|
||||
|
||||
FilePathArgument {
|
||||
OpenBrace LiteralArgContent CloseBrace
|
||||
}
|
||||
|
||||
BareFilePathArgument {
|
||||
Whitespace SpaceDelimitedLiteralArgContent
|
||||
}
|
||||
|
||||
DefinitionArgument {
|
||||
!argument NewLine? Whitespace* OpenBrace DefinitionFragment? CloseBrace
|
||||
}
|
||||
|
||||
MacroParameter {
|
||||
"#" ("1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9")
|
||||
}
|
||||
|
||||
OptionalMacroParameter {
|
||||
OpenBracket MacroParameter CloseBracket
|
||||
}
|
||||
|
||||
// The autocompletion code in services/web/frontend/js/features/source-editor/utils/tree-operations/commands.ts
|
||||
// depends on following the `KnownCommand { Command { CommandCtrlSeq [args] } }`
|
||||
// structure
|
||||
KnownCommand<ArgumentType> {
|
||||
Title {
|
||||
TitleCtrlSeq optionalWhitespace? OptionalArgument? TextArgument
|
||||
} |
|
||||
Author {
|
||||
AuthorCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
|
||||
} |
|
||||
Affil {
|
||||
AffilCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
|
||||
} |
|
||||
Affiliation {
|
||||
AffiliationCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
|
||||
} |
|
||||
Date {
|
||||
DateCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? ShortTextArgument
|
||||
} |
|
||||
DocumentClass {
|
||||
DocumentClassCtrlSeq optionalWhitespace? OptionalArgument?
|
||||
DocumentClassArgument { ShortTextArgument }
|
||||
} |
|
||||
BibliographyCommand {
|
||||
BibliographyCtrlSeq optionalWhitespace?
|
||||
BibliographyArgument { ShortTextArgument }
|
||||
} |
|
||||
BibliographyStyleCommand {
|
||||
BibliographyStyleCtrlSeq optionalWhitespace?
|
||||
BibliographyStyleArgument { ShortTextArgument }
|
||||
} |
|
||||
UsePackage {
|
||||
UsePackageCtrlSeq optionalWhitespace? OptionalArgument?
|
||||
PackageArgument
|
||||
} |
|
||||
TextColorCommand {
|
||||
TextColorCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ArgumentType
|
||||
} |
|
||||
ColorBoxCommand {
|
||||
ColorBoxCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ArgumentType
|
||||
} |
|
||||
HrefCommand {
|
||||
HrefCtrlSeq optionalWhitespace? UrlArgument ShortTextArgument
|
||||
} |
|
||||
NewTheoremCommand {
|
||||
NewTheoremCtrlSeq "*"? optionalWhitespace? ShortTextArgument ((OptionalArgument? TextArgument) | (TextArgument OptionalArgument))
|
||||
} |
|
||||
TheoremStyleCommand {
|
||||
TheoremStyleCtrlSeq optionalWhitespace? ShortTextArgument
|
||||
} |
|
||||
UrlCommand {
|
||||
UrlCtrlSeq optionalWhitespace? UrlArgument
|
||||
} |
|
||||
VerbCommand {
|
||||
VerbCtrlSeq VerbContent
|
||||
} |
|
||||
LstInlineCommand {
|
||||
LstInlineCtrlSeq optionalWhitespace? OptionalArgument? LstInlineContent
|
||||
} |
|
||||
IncludeGraphics {
|
||||
IncludeGraphicsCtrlSeq optionalWhitespace? OptionalArgument?
|
||||
IncludeGraphicsArgument { FilePathArgument }
|
||||
} |
|
||||
Caption {
|
||||
CaptionCtrlSeq "*"? optionalWhitespace? OptionalArgument? TextArgument
|
||||
} |
|
||||
Label {
|
||||
LabelCtrlSeq optionalWhitespace? LabelArgument
|
||||
} |
|
||||
Ref {
|
||||
(RefCtrlSeq | RefStarrableCtrlSeq "*"?) optionalWhitespace? OptionalArgument? optionalWhitespace? OptionalArgument? optionalWhitespace? RefArgument
|
||||
} |
|
||||
Cite {
|
||||
(CiteCtrlSeq | CiteStarrableCtrlSeq "*"?) optionalWhitespace? OptionalArgument? optionalWhitespace? OptionalArgument? optionalWhitespace? BibKeyArgument
|
||||
} |
|
||||
Def {
|
||||
// allow more general Csname argument to \def commands, since other symbols such as '@' are often used in definitions
|
||||
DefCtrlSeq optionalWhitespace? (Csname | CtrlSym) optionalWhitespace? (MacroParameter | OptionalMacroParameter)* optionalWhitespace? DefinitionArgument
|
||||
} |
|
||||
Let {
|
||||
LetCtrlSeq Csname optionalWhitespace? "="? optionalWhitespace? Csname
|
||||
} |
|
||||
Hbox {
|
||||
HboxCtrlSeq optionalWhitespace? TextArgument
|
||||
} |
|
||||
NewCommand {
|
||||
NewCommandCtrlSeq optionalWhitespace?
|
||||
(Csname | OpenBrace LiteralArgContent CloseBrace)
|
||||
(OptionalArgument)*
|
||||
DefinitionArgument
|
||||
} |
|
||||
RenewCommand {
|
||||
RenewCommandCtrlSeq optionalWhitespace?
|
||||
(Csname | OpenBrace LiteralArgContent CloseBrace)
|
||||
(OptionalArgument)*
|
||||
DefinitionArgument
|
||||
} |
|
||||
NewEnvironment {
|
||||
NewEnvironmentCtrlSeq optionalWhitespace?
|
||||
(OpenBrace LiteralArgContent CloseBrace)
|
||||
(OptionalArgument)*
|
||||
DefinitionArgument
|
||||
DefinitionArgument
|
||||
} |
|
||||
RenewEnvironment {
|
||||
RenewEnvironmentCtrlSeq optionalWhitespace?
|
||||
(Csname | OpenBrace LiteralArgContent CloseBrace)
|
||||
(OptionalArgument)*
|
||||
DefinitionArgument
|
||||
DefinitionArgument
|
||||
} |
|
||||
Input {
|
||||
InputCtrlSeq InputArgument { ( FilePathArgument | BareFilePathArgument ) }
|
||||
} |
|
||||
Include {
|
||||
IncludeCtrlSeq IncludeArgument { FilePathArgument }
|
||||
} |
|
||||
Centering {
|
||||
CenteringCtrlSeq
|
||||
} |
|
||||
Item {
|
||||
ItemCtrlSeq OptionalArgument? optionalWhitespace?
|
||||
} |
|
||||
Maketitle {
|
||||
MaketitleCtrlSeq optionalWhitespace?
|
||||
} |
|
||||
HorizontalLine {
|
||||
(HLineCtrlSeq | TopRuleCtrlSeq | MidRuleCtrlSeq | BottomRuleCtrlSeq) optionalWhitespace?
|
||||
} |
|
||||
MultiColumn {
|
||||
MultiColumnCtrlSeq
|
||||
optionalWhitespace? SpanArgument { ShortTextArgument }
|
||||
optionalWhitespace? ColumnArgument { ShortTextArgument }
|
||||
optionalWhitespace? TabularArgument
|
||||
} |
|
||||
MathTextCommand {
|
||||
MathTextCtrlSeq optionalWhitespace? "*"? TextArgument
|
||||
} |
|
||||
ParBoxCommand {
|
||||
ParBoxCtrlSeq
|
||||
(optionalWhitespace? OptionalArgument)*
|
||||
ShortTextArgument
|
||||
optionalWhitespace? TextArgument
|
||||
} |
|
||||
TextBoldCommand {
|
||||
TextBoldCtrlSeq TextArgument
|
||||
} |
|
||||
TextItalicCommand {
|
||||
TextItalicCtrlSeq TextArgument
|
||||
} |
|
||||
TextSmallCapsCommand {
|
||||
TextSmallCapsCtrlSeq TextArgument
|
||||
} |
|
||||
TextTeletypeCommand {
|
||||
TextTeletypeCtrlSeq TextArgument
|
||||
} |
|
||||
TextMediumCommand {
|
||||
TextMediumCtrlSeq TextArgument
|
||||
} |
|
||||
TextSansSerifCommand {
|
||||
TextSansSerifCtrlSeq TextArgument
|
||||
} |
|
||||
TextSuperscriptCommand {
|
||||
TextSuperscriptCtrlSeq TextArgument
|
||||
} |
|
||||
TextSubscriptCommand {
|
||||
TextSubscriptCtrlSeq TextArgument
|
||||
} |
|
||||
StrikeOutCommand {
|
||||
TextStrikeOutCtrlSeq ArgumentType
|
||||
} |
|
||||
EmphasisCommand {
|
||||
EmphasisCtrlSeq ArgumentType
|
||||
} |
|
||||
UnderlineCommand {
|
||||
UnderlineCtrlSeq ArgumentType
|
||||
} |
|
||||
SetLengthCommand {
|
||||
SetLengthCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ShortTextArgument
|
||||
}
|
||||
}
|
||||
|
||||
UnknownCommand {
|
||||
(CtrlSeq !argument Whitespace (OptionalArgument | TextArgument)+)
|
||||
| (CtrlSeq (OptionalArgument | TextArgument)+)
|
||||
| CtrlSeq Whitespace?
|
||||
| CtrlSym
|
||||
}
|
||||
|
||||
Command {
|
||||
KnownCommand<TextArgument>
|
||||
| UnknownCommand
|
||||
| KnownCtrlSym
|
||||
// Not technically allowed in normal mode, but not worth failing the parse over
|
||||
| LeftCtrlSeq
|
||||
| RightCtrlSeq
|
||||
|
||||
}
|
||||
|
||||
KnownCtrlSym {
|
||||
LineBreak {
|
||||
LineBreakCtrlSym OptionalArgument?
|
||||
}
|
||||
}
|
||||
|
||||
textBase {
|
||||
( Command
|
||||
| DollarMath
|
||||
| BracketMath
|
||||
| ParenMath
|
||||
| NewLine
|
||||
| Normal
|
||||
| Whitespace
|
||||
| Ampersand
|
||||
| Tilde
|
||||
)
|
||||
}
|
||||
|
||||
textWithBrackets {
|
||||
( textBase
|
||||
| OpenBracket
|
||||
| CloseBracket
|
||||
)
|
||||
}
|
||||
|
||||
textWithEnvironmentsAndBlankLines {
|
||||
( BlankLine
|
||||
| KnownEnvironment
|
||||
| Environment
|
||||
| textWithBrackets
|
||||
)
|
||||
}
|
||||
|
||||
textWithGroupsEnvironmentsAndBlankLines {
|
||||
textWithEnvironmentsAndBlankLines
|
||||
| Group<Text>
|
||||
}
|
||||
|
||||
Content<Element> {
|
||||
Element
|
||||
}
|
||||
|
||||
SectioningCommand<Command> {
|
||||
Command optionalWhitespace? "*"? optionalWhitespace? OptionalArgument? optionalWhitespace? SectioningArgument
|
||||
}
|
||||
|
||||
documentSection<Command, Next> {
|
||||
SectioningCommand<Command> Content<(sectionText | !section Next)*>
|
||||
}
|
||||
Book[@isGroup="$Section"] { documentSection<BookCtrlSeq, Part | Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
|
||||
Part[@isGroup="$Section"] { documentSection<PartCtrlSeq, Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
|
||||
Chapter[@isGroup="$Section"] { documentSection<ChapterCtrlSeq, Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
|
||||
Section[@isGroup="$Section"] { documentSection<SectionCtrlSeq, SubSection | SubSubSection | Paragraph | SubParagraph> }
|
||||
SubSection[@isGroup="$Section"] { documentSection<SubSectionCtrlSeq, SubSubSection | Paragraph | SubParagraph> }
|
||||
SubSubSection[@isGroup="$Section"] { documentSection<SubSubSectionCtrlSeq, Paragraph | SubParagraph> }
|
||||
Paragraph[@isGroup="$Section"] { documentSection<ParagraphCtrlSeq, SubParagraph> }
|
||||
SubParagraph[@isGroup="$Section"] { SectioningCommand<SubParagraphCtrlSeq> Content<sectionText*> }
|
||||
|
||||
sectioningCommand {
|
||||
Book | Part | Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph
|
||||
}
|
||||
|
||||
sectionText {
|
||||
!section (
|
||||
textWithGroupsEnvironmentsAndBlankLines
|
||||
)+
|
||||
}
|
||||
|
||||
Text {
|
||||
( sectionText
|
||||
| sectioningCommand)+
|
||||
}
|
||||
|
||||
LongArg {
|
||||
( textWithBrackets
|
||||
| NonEmptyGroup<LongArg>
|
||||
| KnownEnvironment
|
||||
| Environment
|
||||
| BlankLine
|
||||
| "#" // macro character
|
||||
| "_" | "^" // other math chars
|
||||
)*
|
||||
}
|
||||
|
||||
ShortTextArgument {
|
||||
OpenBrace ShortArg CloseBrace
|
||||
}
|
||||
|
||||
ShortArg {
|
||||
( textWithBrackets
|
||||
| NonEmptyGroup<ShortArg>
|
||||
| "#" // macro character
|
||||
| "_" | "^" // other math chars
|
||||
)*
|
||||
}
|
||||
|
||||
ShortOptionalArg {
|
||||
( textBase
|
||||
| NonEmptyGroup<ShortOptionalArg>
|
||||
| "#" // macro character
|
||||
| "_" // underscore is used in some parameter names
|
||||
)*
|
||||
}
|
||||
|
||||
TikzPictureContent { /// same as Text but with added allowed characters
|
||||
( textWithEnvironmentsAndBlankLines
|
||||
| NonEmptyGroup<TikzPictureContent>
|
||||
| "#" // macro character
|
||||
| "_" | "^" // other math chars
|
||||
)+
|
||||
}
|
||||
|
||||
DefinitionFragment {
|
||||
( DefinitionFragmentCommand
|
||||
| Begin
|
||||
| End
|
||||
| Group<DefinitionFragment>
|
||||
| Dollar
|
||||
| OpenParenCtrlSym
|
||||
| CloseParenCtrlSym
|
||||
| OpenBracketCtrlSym
|
||||
| CloseBracketCtrlSym
|
||||
| LeftCtrlSeq
|
||||
| RightCtrlSeq
|
||||
| BlankLine
|
||||
| NewLine
|
||||
| Normal
|
||||
| Whitespace
|
||||
| OpenBracket
|
||||
| CloseBracket
|
||||
| "#" // macro character
|
||||
| Ampersand // for tables
|
||||
| Tilde // unbreakable space
|
||||
| "_" | "^" // other math chars
|
||||
| SectioningCommand<
|
||||
BookCtrlSeq |
|
||||
PartCtrlSeq |
|
||||
ChapterCtrlSeq |
|
||||
SectionCtrlSeq |
|
||||
SubSectionCtrlSeq |
|
||||
SubSubSectionCtrlSeq |
|
||||
ParagraphCtrlSeq |
|
||||
SubParagraphCtrlSeq
|
||||
>
|
||||
)+
|
||||
}
|
||||
|
||||
DefinitionFragmentArgument {
|
||||
OpenBrace DefinitionFragment? CloseBrace
|
||||
}
|
||||
|
||||
DefinitionFragmentCommand {
|
||||
KnownCommand<TextArgument>
|
||||
| DefinitionFragmentUnknownCommand { genericUnknownCommandWithOptionalArguments<DefinitionFragmentArgument, OptionalArgument> }
|
||||
| KnownCtrlSym
|
||||
}
|
||||
|
||||
KnownEnvironment {
|
||||
( DocumentEnvironment
|
||||
| TabularEnvironment
|
||||
| EquationEnvironment
|
||||
| EquationArrayEnvironment
|
||||
| VerbatimEnvironment
|
||||
| TikzPictureEnvironment
|
||||
| FigureEnvironment
|
||||
| ListEnvironment
|
||||
| TableEnvironment
|
||||
)
|
||||
}
|
||||
|
||||
BeginEnv<name> {
|
||||
Begin
|
||||
EnvNameGroup<name>
|
||||
OptionalArgument?
|
||||
(!argument TextArgument)*
|
||||
}
|
||||
|
||||
EndEnv<name> {
|
||||
End
|
||||
EnvNameGroup<name>
|
||||
}
|
||||
|
||||
DocumentEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<DocumentEnvName>
|
||||
Content<Text>
|
||||
EndEnv<DocumentEnvName>
|
||||
(TrailingWhitespaceOnly | TrailingContent)?
|
||||
}
|
||||
|
||||
TabularContent {
|
||||
(textWithGroupsEnvironmentsAndBlankLines)*
|
||||
}
|
||||
|
||||
TabularEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<TabularEnvName>
|
||||
Content<TabularContent>
|
||||
EndEnv<TabularEnvName>
|
||||
}
|
||||
|
||||
TableEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<TableEnvName>
|
||||
Content<Text>
|
||||
EndEnv<TableEnvName>
|
||||
}
|
||||
|
||||
EquationEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<EquationEnvName>
|
||||
Content<Math?>
|
||||
EndEnv<EquationEnvName>
|
||||
}
|
||||
|
||||
EquationArrayEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<EquationArrayEnvName>
|
||||
Content<Math?>
|
||||
EndEnv<EquationArrayEnvName>
|
||||
}
|
||||
|
||||
VerbatimEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<VerbatimEnvName>
|
||||
Content<VerbatimContent>
|
||||
EndEnv<VerbatimEnvName>
|
||||
}
|
||||
|
||||
TikzPictureEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<TikzPictureEnvName>
|
||||
Content<TikzPictureContent>
|
||||
EndEnv<TikzPictureEnvName>
|
||||
}
|
||||
|
||||
FigureEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<FigureEnvName>
|
||||
Content<Text>
|
||||
EndEnv<FigureEnvName>
|
||||
}
|
||||
|
||||
ListEnvironment[@isGroup="$Environment"] {
|
||||
BeginEnv<ListEnvName>
|
||||
Content<Text>
|
||||
EndEnv<ListEnvName>
|
||||
}
|
||||
|
||||
EnvNameGroup<name> {
|
||||
OpenBrace name CloseBrace
|
||||
}
|
||||
|
||||
Environment[@isGroup="$Environment"] {
|
||||
BeginEnv<EnvName?>
|
||||
Content<Text>
|
||||
EndEnv<EnvName?>
|
||||
}
|
||||
|
||||
Group<GroupContent> {
|
||||
OpenBrace GroupContent? CloseBrace
|
||||
}
|
||||
|
||||
NonEmptyGroup<GroupContent> {
|
||||
OpenBrace GroupContent CloseBrace
|
||||
}
|
||||
|
||||
/// MATH MODE
|
||||
|
||||
DollarMath[@isGroup="$MathContainer"] {
|
||||
Dollar (InlineMath | DisplayMath) Dollar
|
||||
}
|
||||
|
||||
InlineMath {
|
||||
Math
|
||||
}
|
||||
|
||||
DisplayMath {
|
||||
Dollar Math? Dollar
|
||||
}
|
||||
|
||||
|
||||
OpenParenMath[closedBy=CloseParenMath] {
|
||||
OpenParenCtrlSym
|
||||
}
|
||||
|
||||
CloseParenMath[openedBy=OpenParenMath] {
|
||||
CloseParenCtrlSym
|
||||
}
|
||||
|
||||
// alternative syntax \( math \) for inline math, it is the same as $ math $
|
||||
ParenMath[@isGroup="$MathContainer"] {
|
||||
OpenParenMath
|
||||
Math?
|
||||
CloseParenMath
|
||||
}
|
||||
|
||||
OpenBracketMath[closedBy=CloseBracketMath] {
|
||||
OpenBracketCtrlSym
|
||||
}
|
||||
|
||||
CloseBracketMath[openedBy=OpenBracketMath] {
|
||||
CloseBracketCtrlSym
|
||||
}
|
||||
|
||||
// alternative syntax \[ math \] for display math, it is the same as $$ math $$
|
||||
BracketMath[@isGroup="$MathContainer"] {
|
||||
OpenBracketMath
|
||||
Math?
|
||||
CloseBracketMath
|
||||
}
|
||||
|
||||
// FIXME: we should have separate math modes for inline and display math,
|
||||
// because display math can contain blank lines while inline math cannot.
|
||||
|
||||
Math {
|
||||
( MathCommand
|
||||
| Group<Math>
|
||||
| MathDelimitedGroup
|
||||
| MathSpecialChar
|
||||
| Number
|
||||
| NewLine
|
||||
| Whitespace
|
||||
| KnownEnvironment
|
||||
| Environment
|
||||
| MathChar
|
||||
| OpenBracket
|
||||
| CloseBracket
|
||||
| Ampersand
|
||||
| Tilde
|
||||
)+
|
||||
}
|
||||
|
||||
|
||||
MathCommand {
|
||||
KnownCommand<MathArgument>
|
||||
| MathUnknownCommand { genericUnknownCommand<MathArgument> }
|
||||
| KnownCtrlSym
|
||||
}
|
||||
|
||||
@external tokens argumentListTokenizer from "./tokens.mjs" {
|
||||
hasMoreArguments,
|
||||
endOfArguments
|
||||
}
|
||||
|
||||
@external tokens argumentListWithOptionalTokenizer from "./tokens.mjs" {
|
||||
hasMoreArgumentsOrOptionals,
|
||||
endOfArgumentsAndOptionals
|
||||
}
|
||||
|
||||
genericUnknownCommand<ArgumentType> {
|
||||
CtrlSeq (hasMoreArguments optionalWhitespace? ArgumentType)* endOfArguments
|
||||
| CtrlSym
|
||||
}
|
||||
|
||||
genericUnknownCommandWithOptionalArguments<ArgumentType, OptionalArgumentType> {
|
||||
CtrlSeq (hasMoreArgumentsOrOptionals optionalWhitespace? (ArgumentType | OptionalArgumentType))* endOfArgumentsAndOptionals
|
||||
| CtrlSym
|
||||
}
|
||||
|
||||
MathArgument {
|
||||
OpenBrace Math? CloseBrace
|
||||
}
|
||||
|
||||
MathDelimitedGroup {
|
||||
MathOpening Math? MathClosing
|
||||
}
|
||||
|
||||
// FIXME: we have the same problem with specialize on \left,\right as the delimiters
|
||||
MathOpening {
|
||||
LeftCtrlSeq optionalWhitespace? MathDelimiter
|
||||
}
|
||||
|
||||
MathClosing {
|
||||
RightCtrlSeq optionalWhitespace? MathDelimiter
|
||||
}
|
||||
|
||||
MathDelimiter {
|
||||
// Allowed delimiters, from the LaTeX manual, table 3.10
|
||||
"/" | "|" | "(" | ")" | "[" | "]" |
|
||||
"\\{" | "\\}" | "\\|" |
|
||||
"\\lfloor" | "\\rfloor" |
|
||||
"\\lceil" | "\\rceil" |
|
||||
"\\langle" | "\\rangle" |
|
||||
"\\backslash" | "\\uparrow" |
|
||||
"\\Uparrow" | "\\Downarrow" |
|
||||
"\\updownarrow" | "\\Updownarrow" |
|
||||
"\\downarrow" | "\\lvert" |
|
||||
"\\lVert" | "\\rVert" |
|
||||
"\\rvert" | "\\vert" | "\\Vert" |
|
||||
"\\lbrace" | "\\rbrace" |
|
||||
"\\lbrack" | "\\rbrack" |
|
||||
// Also allow the empty match
|
||||
"."
|
||||
}
|
||||
|
||||
// NOTE: precedence works differently for rules and token, in the rule
|
||||
// you have to give a specifier !foo which is defined in the @precedence
|
||||
// block here.
|
||||
|
||||
@precedence {
|
||||
section @left,
|
||||
argument @left // make CtrlSeq arguments left associative
|
||||
}
|
||||
@@ -0,0 +1,747 @@
|
||||
/* Hand-written tokenizer for LaTeX. */
|
||||
|
||||
import { ExternalTokenizer, ContextTracker } from '@lezer/lr'
|
||||
|
||||
import {
|
||||
LiteralArgContent,
|
||||
SpaceDelimitedLiteralArgContent,
|
||||
VerbContent,
|
||||
VerbatimContent,
|
||||
LstInlineContent,
|
||||
Begin,
|
||||
End,
|
||||
KnownEnvironment,
|
||||
Csname,
|
||||
TrailingWhitespaceOnly,
|
||||
TrailingContent,
|
||||
RefCtrlSeq,
|
||||
RefStarrableCtrlSeq,
|
||||
CiteCtrlSeq,
|
||||
CiteStarrableCtrlSeq,
|
||||
LabelCtrlSeq,
|
||||
MathTextCtrlSeq,
|
||||
HboxCtrlSeq,
|
||||
TitleCtrlSeq,
|
||||
AuthorCtrlSeq,
|
||||
AffilCtrlSeq,
|
||||
AffiliationCtrlSeq,
|
||||
DateCtrlSeq,
|
||||
DocumentClassCtrlSeq,
|
||||
UsePackageCtrlSeq,
|
||||
HrefCtrlSeq,
|
||||
UrlCtrlSeq,
|
||||
VerbCtrlSeq,
|
||||
LstInlineCtrlSeq,
|
||||
IncludeGraphicsCtrlSeq,
|
||||
CaptionCtrlSeq,
|
||||
DefCtrlSeq,
|
||||
LetCtrlSeq,
|
||||
LeftCtrlSeq,
|
||||
RightCtrlSeq,
|
||||
NewCommandCtrlSeq,
|
||||
RenewCommandCtrlSeq,
|
||||
NewEnvironmentCtrlSeq,
|
||||
RenewEnvironmentCtrlSeq,
|
||||
DocumentEnvName,
|
||||
TabularEnvName,
|
||||
EquationEnvName,
|
||||
EquationArrayEnvName,
|
||||
VerbatimEnvName,
|
||||
TikzPictureEnvName,
|
||||
FigureEnvName,
|
||||
OpenParenCtrlSym,
|
||||
CloseParenCtrlSym,
|
||||
OpenBracketCtrlSym,
|
||||
CloseBracketCtrlSym,
|
||||
LineBreakCtrlSym,
|
||||
// Sectioning commands
|
||||
BookCtrlSeq,
|
||||
PartCtrlSeq,
|
||||
ChapterCtrlSeq,
|
||||
SectionCtrlSeq,
|
||||
SubSectionCtrlSeq,
|
||||
SubSubSectionCtrlSeq,
|
||||
ParagraphCtrlSeq,
|
||||
SubParagraphCtrlSeq,
|
||||
InputCtrlSeq,
|
||||
IncludeCtrlSeq,
|
||||
ItemCtrlSeq,
|
||||
NewTheoremCtrlSeq,
|
||||
TheoremStyleCtrlSeq,
|
||||
BibliographyCtrlSeq,
|
||||
BibliographyStyleCtrlSeq,
|
||||
CenteringCtrlSeq,
|
||||
ListEnvName,
|
||||
MaketitleCtrlSeq,
|
||||
TextColorCtrlSeq,
|
||||
ColorBoxCtrlSeq,
|
||||
HLineCtrlSeq,
|
||||
TopRuleCtrlSeq,
|
||||
MidRuleCtrlSeq,
|
||||
BottomRuleCtrlSeq,
|
||||
TableEnvName,
|
||||
MultiColumnCtrlSeq,
|
||||
ParBoxCtrlSeq,
|
||||
// Marker for end of argument lists
|
||||
endOfArguments,
|
||||
hasMoreArguments,
|
||||
hasMoreArgumentsOrOptionals,
|
||||
endOfArgumentsAndOptionals,
|
||||
TextBoldCtrlSeq,
|
||||
TextItalicCtrlSeq,
|
||||
TextSmallCapsCtrlSeq,
|
||||
TextTeletypeCtrlSeq,
|
||||
TextMediumCtrlSeq,
|
||||
TextSansSerifCtrlSeq,
|
||||
TextSuperscriptCtrlSeq,
|
||||
TextSubscriptCtrlSeq,
|
||||
TextStrikeOutCtrlSeq,
|
||||
EmphasisCtrlSeq,
|
||||
UnderlineCtrlSeq,
|
||||
SetLengthCtrlSeq,
|
||||
} from './latex.terms.mjs'
|
||||
|
||||
const MAX_ARGUMENT_LOOKAHEAD = 100
|
||||
|
||||
function nameChar(ch) {
|
||||
// we accept A-Z a-z 0-9 * + @ in environment names
|
||||
return (
|
||||
(ch >= 65 && ch <= 90) ||
|
||||
(ch >= 97 && ch <= 122) ||
|
||||
(ch >= 48 && ch <= 57) ||
|
||||
ch === 42 ||
|
||||
ch === 43 ||
|
||||
ch === 64
|
||||
)
|
||||
}
|
||||
|
||||
// match [a-zA-Z]
|
||||
function alphaChar(ch) {
|
||||
return (ch >= 65 && ch <= 90) || (ch >= 97 && ch <= 122)
|
||||
}
|
||||
|
||||
let cachedName = null
|
||||
let cachedInput = null
|
||||
let cachedPos = 0
|
||||
function envNameAfter(input, offset) {
|
||||
const pos = input.pos + offset
|
||||
if (cachedInput === input && cachedPos === pos) {
|
||||
return cachedName
|
||||
}
|
||||
if (input.peek(offset) !== '{'.charCodeAt(0)) return
|
||||
offset++
|
||||
let name = ''
|
||||
for (;;) {
|
||||
const next = input.peek(offset)
|
||||
if (!nameChar(next)) break
|
||||
name += String.fromCharCode(next)
|
||||
offset++
|
||||
}
|
||||
cachedInput = input
|
||||
cachedPos = pos
|
||||
return (cachedName = name || null)
|
||||
}
|
||||
|
||||
function ElementContext(name, parent) {
|
||||
this.name = name
|
||||
this.parent = parent
|
||||
this.hash = parent ? parent.hash : 0
|
||||
for (let i = 0; i < name.length; i++)
|
||||
this.hash +=
|
||||
(this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8)
|
||||
}
|
||||
|
||||
export const elementContext = new ContextTracker({
|
||||
start: null,
|
||||
shift(context, term, stack, input) {
|
||||
return term === Begin
|
||||
? new ElementContext(envNameAfter(input, '\\begin'.length) || '', context)
|
||||
: context
|
||||
},
|
||||
reduce(context, term) {
|
||||
return term === KnownEnvironment && context ? context.parent : context
|
||||
},
|
||||
reuse(context, node, _stack, input) {
|
||||
const type = node.type.id
|
||||
return type === Begin
|
||||
? new ElementContext(envNameAfter(input, 0) || '', context)
|
||||
: context
|
||||
},
|
||||
hash(context) {
|
||||
return context ? context.hash : 0
|
||||
},
|
||||
strict: false,
|
||||
})
|
||||
|
||||
// tokenizer for \verb|...| commands
|
||||
export const verbTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
if (input.next === '*'.charCodeAt(0)) input.advance()
|
||||
const delimiter = input.next
|
||||
if (delimiter === -1) return // hit end of file
|
||||
if (/\s|\*/.test(String.fromCharCode(delimiter))) return // invalid delimiter
|
||||
input.advance()
|
||||
for (;;) {
|
||||
const next = input.next
|
||||
if (next === -1 || next === CHAR_NEWLINE) return
|
||||
input.advance()
|
||||
if (next === delimiter) break
|
||||
}
|
||||
return input.acceptToken(VerbContent)
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// tokenizer for \lstinline|...| commands
|
||||
export const lstinlineTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
let delimiter = input.next
|
||||
if (delimiter === -1) return // hit end of file
|
||||
if (/\s/.test(String.fromCharCode(delimiter))) {
|
||||
return // invalid delimiter
|
||||
}
|
||||
if (delimiter === CHAR_OPEN_BRACE) {
|
||||
delimiter = CHAR_CLOSE_BRACE
|
||||
}
|
||||
input.advance()
|
||||
for (;;) {
|
||||
const next = input.next
|
||||
if (next === -1 || next === CHAR_NEWLINE) return
|
||||
input.advance()
|
||||
if (next === delimiter) break
|
||||
}
|
||||
return input.acceptToken(LstInlineContent)
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
const matchForward = (input, expected, offset = 0) => {
|
||||
for (let i = 0; i < expected.length; i++) {
|
||||
if (String.fromCharCode(input.peek(offset + i)) !== expected[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// tokenizer for \begin{verbatim}...\end{verbatim} environments
|
||||
export const verbatimTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
const delimiter = '\\end{' + stack.context.name + '}'
|
||||
for (let offset = 0; ; offset++) {
|
||||
const next = input.peek(offset)
|
||||
if (next === -1 || matchForward(input, delimiter, offset)) {
|
||||
return input.acceptToken(VerbatimContent, offset)
|
||||
}
|
||||
}
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// tokenizer for \href{...} and similar commands
|
||||
export const literalArgTokenizer = new ExternalTokenizer(
|
||||
input => {
|
||||
for (let offset = 0; ; offset++) {
|
||||
const next = input.peek(offset)
|
||||
if (next === -1 || next === CHAR_CLOSE_BRACE) {
|
||||
return input.acceptToken(LiteralArgContent, offset)
|
||||
}
|
||||
}
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// tokenizer for literal content delimited by whitespace, such as in `\input foo.tex`
|
||||
export const spaceDelimitedLiteralArgTokenizer = new ExternalTokenizer(
|
||||
input => {
|
||||
for (let offset = 0; ; offset++) {
|
||||
const next = input.peek(offset)
|
||||
if (next === -1 || next === CHAR_SPACE || next === CHAR_NEWLINE) {
|
||||
return input.acceptToken(SpaceDelimitedLiteralArgContent, offset)
|
||||
}
|
||||
}
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
// helper function to look up charCodes
|
||||
function _char(s) {
|
||||
return s.charCodeAt(0)
|
||||
}
|
||||
|
||||
const CHAR_BACKSLASH = _char('\\')
|
||||
const CHAR_OPEN_BRACE = _char('{')
|
||||
const CHAR_OPEN_BRACKET = _char('[')
|
||||
const CHAR_CLOSE_BRACE = _char('}')
|
||||
const CHAR_TAB = _char('\t')
|
||||
const CHAR_SPACE = _char(' ')
|
||||
const CHAR_NEWLINE = _char('\n')
|
||||
|
||||
const lookaheadTokenizer = getToken =>
|
||||
new ExternalTokenizer(
|
||||
input => {
|
||||
for (let i = 0; i < MAX_ARGUMENT_LOOKAHEAD; ++i) {
|
||||
const next = input.peek(i)
|
||||
if (next === CHAR_SPACE || next === CHAR_TAB) {
|
||||
continue
|
||||
}
|
||||
const token = getToken(next)
|
||||
if (token) {
|
||||
input.acceptToken(token)
|
||||
return
|
||||
}
|
||||
}
|
||||
},
|
||||
{ contextual: false, fallback: true }
|
||||
)
|
||||
|
||||
export const argumentListTokenizer = lookaheadTokenizer(next => {
|
||||
if (next === CHAR_OPEN_BRACE) {
|
||||
return hasMoreArguments
|
||||
} else {
|
||||
return endOfArguments
|
||||
}
|
||||
})
|
||||
|
||||
export const argumentListWithOptionalTokenizer = lookaheadTokenizer(next => {
|
||||
if (next === CHAR_OPEN_BRACE || next === CHAR_OPEN_BRACKET) {
|
||||
return hasMoreArgumentsOrOptionals
|
||||
} else {
|
||||
return endOfArgumentsAndOptionals
|
||||
}
|
||||
})
|
||||
|
||||
const CHAR_AT_SYMBOL = _char('@')
|
||||
|
||||
export const csnameTokenizer = new ExternalTokenizer((input, stack) => {
|
||||
let offset = 0
|
||||
let end = -1
|
||||
// look at the first character, we are looking for acceptable control sequence names
|
||||
// including @ signs, \\[a-zA-Z@]+
|
||||
const next = input.peek(offset)
|
||||
if (next === -1) {
|
||||
return
|
||||
}
|
||||
// reject anything not starting with a backslash,
|
||||
// we only accept control sequences
|
||||
if (next !== CHAR_BACKSLASH) {
|
||||
return
|
||||
}
|
||||
offset++
|
||||
for (;;) {
|
||||
const next = input.peek(offset)
|
||||
// stop when we reach the end of file or a non-csname character
|
||||
if (next === -1 || !(alphaChar(next) || next === CHAR_AT_SYMBOL)) {
|
||||
end = offset - 1
|
||||
break
|
||||
}
|
||||
end = offset
|
||||
offset++
|
||||
}
|
||||
if (end === -1) return
|
||||
// accept the content as a valid control sequence
|
||||
return input.acceptToken(Csname, end + 1)
|
||||
})
|
||||
|
||||
const END_DOCUMENT_MARK = '\\end{document}'.split('').reverse()
|
||||
|
||||
export const trailingContentTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
if (input.next === -1) return // no trailing content
|
||||
// Look back for end-document mark, bail out if any characters do not match
|
||||
for (let i = 1; i < END_DOCUMENT_MARK.length + 1; i++) {
|
||||
if (String.fromCharCode(input.peek(-i)) !== END_DOCUMENT_MARK[i - 1]) {
|
||||
return
|
||||
}
|
||||
}
|
||||
while (input.next === CHAR_SPACE || input.next === CHAR_NEWLINE) {
|
||||
const next = input.advance()
|
||||
if (next === -1) return input.acceptToken(TrailingWhitespaceOnly) // trailing whitespace only
|
||||
}
|
||||
// accept the all content up to the end of the document
|
||||
while (input.advance() !== -1) {
|
||||
//
|
||||
}
|
||||
return input.acceptToken(TrailingContent)
|
||||
}
|
||||
)
|
||||
|
||||
const refCommands = new Set([
|
||||
'\\fullref',
|
||||
'\\Vref',
|
||||
'\\autopageref',
|
||||
'\\autoref',
|
||||
'\\eqref',
|
||||
'\\labelcpageref',
|
||||
'\\labelcref',
|
||||
'\\lcnamecref',
|
||||
'\\lcnamecrefs',
|
||||
'\\namecref',
|
||||
'\\nameCref',
|
||||
'\\namecrefs',
|
||||
'\\nameCrefs',
|
||||
'\\thnameref',
|
||||
'\\thref',
|
||||
'\\titleref',
|
||||
'\\vrefrange',
|
||||
'\\Crefrange',
|
||||
'\\Crefrang',
|
||||
'\\fref',
|
||||
'\\pref',
|
||||
'\\tref',
|
||||
'\\Aref',
|
||||
'\\Bref',
|
||||
'\\Pref',
|
||||
'\\Sref',
|
||||
'\\vref',
|
||||
'\\nameref',
|
||||
])
|
||||
|
||||
const refStarrableCommands = new Set([
|
||||
'\\vpageref',
|
||||
'\\vref',
|
||||
'\\zcpageref',
|
||||
'\\zcref',
|
||||
'\\zfullref',
|
||||
'\\zref',
|
||||
'\\zvpageref',
|
||||
'\\zvref',
|
||||
'\\cref',
|
||||
'\\Cref',
|
||||
'\\pageref',
|
||||
'\\ref',
|
||||
'\\Ref',
|
||||
'\\subref',
|
||||
'\\zpageref',
|
||||
'\\ztitleref',
|
||||
'\\vpagerefrange',
|
||||
'\\zvpagerefrange',
|
||||
'\\zvrefrange',
|
||||
'\\crefrange',
|
||||
])
|
||||
|
||||
const citeCommands = new Set([
|
||||
'\\autocites',
|
||||
'\\Autocites',
|
||||
'\\Cite',
|
||||
'\\citeA',
|
||||
'\\citealp',
|
||||
'\\Citealp',
|
||||
'\\citealt',
|
||||
'\\Citealt',
|
||||
'\\citeauthorNP',
|
||||
'\\citeauthorp',
|
||||
'\\Citeauthorp',
|
||||
'\\citeauthort',
|
||||
'\\Citeauthort',
|
||||
'\\citeNP',
|
||||
'\\citenum',
|
||||
'\\citen',
|
||||
'\\citeonline',
|
||||
'\\cites',
|
||||
'\\Cites',
|
||||
'\\citeurl',
|
||||
'\\citeyearpar',
|
||||
'\\defcitealias',
|
||||
'\\fnotecite',
|
||||
'\\footcite',
|
||||
'\\footcitetext',
|
||||
'\\footfullcite',
|
||||
'\\footnotecites',
|
||||
'\\Footnotecites',
|
||||
'\\fullcite',
|
||||
'\\fullciteA',
|
||||
'\\fullciteauthor',
|
||||
'\\fullciteauthorNP',
|
||||
'\\maskcite',
|
||||
'\\maskciteA',
|
||||
'\\maskcitealp',
|
||||
'\\maskCitealp',
|
||||
'\\maskcitealt',
|
||||
'\\maskCitealt',
|
||||
'\\maskciteauthor',
|
||||
'\\maskciteauthorNP',
|
||||
'\\maskciteauthorp',
|
||||
'\\maskCiteauthorp',
|
||||
'\\maskciteauthort',
|
||||
'\\maskCiteauthort',
|
||||
'\\maskciteNP',
|
||||
'\\maskcitenum',
|
||||
'\\maskcitep',
|
||||
'\\maskCitep',
|
||||
'\\maskcitepalias',
|
||||
'\\maskcitet',
|
||||
'\\maskCitet',
|
||||
'\\maskcitetalias',
|
||||
'\\maskciteyear',
|
||||
'\\maskciteyearNP',
|
||||
'\\maskciteyearpar',
|
||||
'\\maskfullcite',
|
||||
'\\maskfullciteA',
|
||||
'\\maskfullciteauthor',
|
||||
'\\maskfullciteauthorNP',
|
||||
'\\masknocite',
|
||||
'\\maskshortcite',
|
||||
'\\maskshortciteA',
|
||||
'\\maskshortciteauthor',
|
||||
'\\maskshortciteauthorNP',
|
||||
'\\maskshortciteNP',
|
||||
'\\mautocite',
|
||||
'\\Mautocite',
|
||||
'\\mcite',
|
||||
'\\Mcite',
|
||||
'\\mfootcite',
|
||||
'\\mfootcitetext',
|
||||
'\\mparencite',
|
||||
'\\Mparencite',
|
||||
'\\msupercite',
|
||||
'\\mtextcite',
|
||||
'\\Mtextcite',
|
||||
'\\nocite',
|
||||
'\\nocitemeta',
|
||||
'\\notecite',
|
||||
'\\Parencite',
|
||||
'\\parencites',
|
||||
'\\Parencites',
|
||||
'\\pnotecite',
|
||||
'\\shortcite',
|
||||
'\\shortciteA',
|
||||
'\\shortciteauthor',
|
||||
'\\shortciteauthorNP',
|
||||
'\\shortciteNP',
|
||||
'\\smartcite',
|
||||
'\\Smartcite',
|
||||
'\\smartcites',
|
||||
'\\Smartcites',
|
||||
'\\supercite',
|
||||
'\\supercites',
|
||||
'\\textcite',
|
||||
'\\Textcite',
|
||||
'\\textcites',
|
||||
'\\Textcites',
|
||||
])
|
||||
|
||||
const citeStarredCommands = new Set([
|
||||
'\\cite',
|
||||
'\\citeauthor',
|
||||
'\\Citeauthor',
|
||||
'\\citedate',
|
||||
'\\citep',
|
||||
'\\citepalias',
|
||||
'\\Citep',
|
||||
'\\citetitle',
|
||||
'\\citeyear',
|
||||
'\\parencite',
|
||||
'\\citet',
|
||||
'\\citetalias',
|
||||
'\\autocite',
|
||||
'\\Autocite',
|
||||
])
|
||||
|
||||
const labelCommands = new Set(['\\label', '\\thlabel', '\\zlabel'])
|
||||
|
||||
const mathTextCommands = new Set(['\\text', '\\tag', '\\textrm', '\\intertext'])
|
||||
|
||||
const otherKnowncommands = {
|
||||
'\\hbox': HboxCtrlSeq,
|
||||
'\\title': TitleCtrlSeq,
|
||||
'\\author': AuthorCtrlSeq,
|
||||
'\\affil': AffilCtrlSeq,
|
||||
'\\affiliation': AffiliationCtrlSeq,
|
||||
'\\date': DateCtrlSeq,
|
||||
'\\documentclass': DocumentClassCtrlSeq,
|
||||
'\\usepackage': UsePackageCtrlSeq,
|
||||
'\\href': HrefCtrlSeq,
|
||||
'\\url': UrlCtrlSeq,
|
||||
'\\verb': VerbCtrlSeq,
|
||||
'\\lstinline': LstInlineCtrlSeq,
|
||||
'\\includegraphics': IncludeGraphicsCtrlSeq,
|
||||
'\\caption': CaptionCtrlSeq,
|
||||
'\\def': DefCtrlSeq,
|
||||
'\\let': LetCtrlSeq,
|
||||
'\\left': LeftCtrlSeq,
|
||||
'\\right': RightCtrlSeq,
|
||||
'\\newcommand': NewCommandCtrlSeq,
|
||||
'\\renewcommand': RenewCommandCtrlSeq,
|
||||
'\\newenvironment': NewEnvironmentCtrlSeq,
|
||||
'\\renewenvironment': RenewEnvironmentCtrlSeq,
|
||||
'\\book': BookCtrlSeq,
|
||||
'\\part': PartCtrlSeq,
|
||||
'\\addpart': PartCtrlSeq,
|
||||
'\\chapter': ChapterCtrlSeq,
|
||||
'\\addchap': ChapterCtrlSeq,
|
||||
'\\section': SectionCtrlSeq,
|
||||
'\\addseq': SectionCtrlSeq,
|
||||
'\\subsection': SubSectionCtrlSeq,
|
||||
'\\subsubsection': SubSubSectionCtrlSeq,
|
||||
'\\paragraph': ParagraphCtrlSeq,
|
||||
'\\subparagraph': SubParagraphCtrlSeq,
|
||||
'\\input': InputCtrlSeq,
|
||||
'\\include': IncludeCtrlSeq,
|
||||
'\\item': ItemCtrlSeq,
|
||||
'\\centering': CenteringCtrlSeq,
|
||||
'\\newtheorem': NewTheoremCtrlSeq,
|
||||
'\\theoremstyle': TheoremStyleCtrlSeq,
|
||||
'\\bibliography': BibliographyCtrlSeq,
|
||||
'\\bibliographystyle': BibliographyStyleCtrlSeq,
|
||||
'\\maketitle': MaketitleCtrlSeq,
|
||||
'\\textcolor': TextColorCtrlSeq,
|
||||
'\\colorbox': ColorBoxCtrlSeq,
|
||||
'\\hline': HLineCtrlSeq,
|
||||
'\\toprule': TopRuleCtrlSeq,
|
||||
'\\midrule': MidRuleCtrlSeq,
|
||||
'\\bottomrule': BottomRuleCtrlSeq,
|
||||
'\\multicolumn': MultiColumnCtrlSeq,
|
||||
'\\parbox': ParBoxCtrlSeq,
|
||||
'\\textbf': TextBoldCtrlSeq,
|
||||
'\\textit': TextItalicCtrlSeq,
|
||||
'\\textsc': TextSmallCapsCtrlSeq,
|
||||
'\\texttt': TextTeletypeCtrlSeq,
|
||||
'\\textmd': TextMediumCtrlSeq,
|
||||
'\\textsf': TextSansSerifCtrlSeq,
|
||||
'\\textsuperscript': TextSuperscriptCtrlSeq,
|
||||
'\\textsubscript': TextSubscriptCtrlSeq,
|
||||
'\\sout': TextStrikeOutCtrlSeq,
|
||||
'\\emph': EmphasisCtrlSeq,
|
||||
'\\underline': UnderlineCtrlSeq,
|
||||
'\\setlength': SetLengthCtrlSeq,
|
||||
}
|
||||
// specializer for control sequences
|
||||
// return new tokens for specific control sequences
|
||||
export const specializeCtrlSeq = (name, terms) => {
|
||||
if (name === '\\begin') return Begin
|
||||
if (name === '\\end') return End
|
||||
if (refCommands.has(name)) {
|
||||
return RefCtrlSeq
|
||||
}
|
||||
if (refStarrableCommands.has(name)) {
|
||||
return RefStarrableCtrlSeq
|
||||
}
|
||||
if (citeCommands.has(name)) {
|
||||
return CiteCtrlSeq
|
||||
}
|
||||
if (citeStarredCommands.has(name)) {
|
||||
return CiteStarrableCtrlSeq
|
||||
}
|
||||
if (labelCommands.has(name)) {
|
||||
return LabelCtrlSeq
|
||||
}
|
||||
if (mathTextCommands.has(name)) {
|
||||
return MathTextCtrlSeq
|
||||
}
|
||||
return otherKnowncommands[name] || -1
|
||||
}
|
||||
|
||||
const tabularEnvNames = new Set([
|
||||
'tabular',
|
||||
'xltabular',
|
||||
'tabularx',
|
||||
'longtable',
|
||||
])
|
||||
|
||||
const equationEnvNames = new Set([
|
||||
'equation',
|
||||
'equation*',
|
||||
'displaymath',
|
||||
'displaymath*',
|
||||
'math',
|
||||
'math*',
|
||||
'multline',
|
||||
'multline*',
|
||||
'matrix',
|
||||
'tikzcd',
|
||||
])
|
||||
|
||||
const equationArrayEnvNames = new Set([
|
||||
'array',
|
||||
'eqnarray',
|
||||
'eqnarray*',
|
||||
'align',
|
||||
'align*',
|
||||
'alignat',
|
||||
'alignat*',
|
||||
'flalign',
|
||||
'flalign*',
|
||||
'gather',
|
||||
'gather*',
|
||||
'pmatrix',
|
||||
'pmatrix*',
|
||||
'bmatrix',
|
||||
'bmatrix*',
|
||||
'Bmatrix',
|
||||
'Bmatrix*',
|
||||
'vmatrix',
|
||||
'vmatrix*',
|
||||
'Vmatrix',
|
||||
'Vmatrix*',
|
||||
'smallmatrix',
|
||||
'smallmatrix*',
|
||||
'split',
|
||||
'split*',
|
||||
'gathered',
|
||||
'gathered*',
|
||||
'aligned',
|
||||
'aligned*',
|
||||
'alignedat',
|
||||
'alignedat*',
|
||||
'cases',
|
||||
'cases*',
|
||||
'dcases',
|
||||
'dcases*',
|
||||
'rcases',
|
||||
'rcases*',
|
||||
'IEEEeqnarray',
|
||||
'IEEEeqnarray*',
|
||||
])
|
||||
|
||||
const verbatimEnvNames = new Set([
|
||||
'verbatim',
|
||||
'boxedverbatim',
|
||||
'lstlisting',
|
||||
'minted',
|
||||
'Verbatim',
|
||||
'lstlisting',
|
||||
'tcblisting',
|
||||
'codeexample',
|
||||
'comment',
|
||||
])
|
||||
|
||||
const otherKnownEnvNames = {
|
||||
document: DocumentEnvName,
|
||||
tikzpicture: TikzPictureEnvName,
|
||||
figure: FigureEnvName,
|
||||
'figure*': FigureEnvName,
|
||||
subfigure: FigureEnvName,
|
||||
enumerate: ListEnvName,
|
||||
itemize: ListEnvName,
|
||||
table: TableEnvName,
|
||||
description: ListEnvName,
|
||||
}
|
||||
|
||||
export const specializeEnvName = (name, terms) => {
|
||||
if (tabularEnvNames.has(name)) {
|
||||
return TabularEnvName
|
||||
}
|
||||
if (equationEnvNames.has(name)) {
|
||||
return EquationEnvName
|
||||
}
|
||||
if (equationArrayEnvNames.has(name)) {
|
||||
return EquationArrayEnvName
|
||||
}
|
||||
if (verbatimEnvNames.has(name)) {
|
||||
return VerbatimEnvName
|
||||
}
|
||||
return otherKnownEnvNames[name] || -1
|
||||
}
|
||||
|
||||
const otherKnownCtrlSyms = {
|
||||
'\\(': OpenParenCtrlSym,
|
||||
'\\)': CloseParenCtrlSym,
|
||||
'\\[': OpenBracketCtrlSym,
|
||||
'\\]': CloseBracketCtrlSym,
|
||||
'\\\\': LineBreakCtrlSym,
|
||||
}
|
||||
|
||||
export const specializeCtrlSym = (name, terms) => {
|
||||
return otherKnownCtrlSyms[name] || -1
|
||||
}
|
||||
Reference in New Issue
Block a user