first commit

2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions
--- a/services/web/scripts/lezer-latex/benchmark.mjs
+++ b/services/web/scripts/lezer-latex/benchmark.mjs
@@ -0,0 +1,66 @@
+import { parser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
+
+import * as fs from 'node:fs'
+import * as path from 'node:path'
+import { fileURLToPath } from 'node:url'
+import minimist from 'minimist'
+
+const argv = minimist(process.argv.slice(2))
+const NUMBER_OF_OPS = argv.ops || 100
+const CSV_OUTPUT = argv.csv || false
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url))
+
+const examplesDir = path.join(
+  __dirname,
+  '../../test/unit/src/LezerLatex/examples'
+)
+
+const strictParser = parser.configure({ strict: true }) // throw exception for invalid documents
+
+if (!fs.existsSync(examplesDir)) {
+  console.error('No examples directory')
+  process.exit()
+}
+
+function dumpParserStats(parser) {
+  console.log('Parser size:')
+  console.dir({
+    states: parser.states.length,
+    data: parser.data.length,
+    goto: parser.goto.length,
+  })
+}
+
+dumpParserStats(strictParser)
+
+const folder = examplesDir
+for (const file of fs.readdirSync(folder).sort()) {
+  if (!/\.tex$/.test(file)) continue
+  const name = /^[^.]*/.exec(file)[0]
+  const content = fs.readFileSync(path.join(folder, file), 'utf8')
+
+  benchmark(name, content)
+}
+
+function benchmark(name, content) {
+  let timeSum = 0
+  try {
+    for (let i = 0; i < NUMBER_OF_OPS; ++i) {
+      const startTime = performance.now()
+      strictParser.parse(content)
+      const endTime = performance.now()
+      timeSum += endTime - startTime
+    }
+    const avgTime = timeSum / NUMBER_OF_OPS
+    if (CSV_OUTPUT) {
+      console.log(`${name},${avgTime.toFixed(2)},${content.length}`)
+    } else {
+      console.log(
+        `${name.padEnd(20)} time to run (ms):\t ${avgTime.toFixed(2)}`
+      )
+    }
+  } catch (error) {
+    console.error(`${name.padEnd(20)} ${error}`)
+  }
+}
--- a/services/web/scripts/lezer-latex/generate.js
+++ b/services/web/scripts/lezer-latex/generate.js
@@ -0,0 +1,69 @@
+const { buildParserFile } = require('@lezer/generator')
+const { writeFileSync, readFileSync } = require('fs')
+const path = require('path')
+
+const grammars = [
+  {
+    grammarPath: path.resolve(
+      __dirname,
+      '../../frontend/js/features/source-editor/lezer-latex/latex.grammar'
+    ),
+    parserOutputPath: path.resolve(
+      __dirname,
+      '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
+    ),
+    termsOutputPath: path.resolve(
+      __dirname,
+      '../../frontend/js/features/source-editor/lezer-latex/latex.terms.mjs'
+    ),
+  },
+  {
+    grammarPath: path.resolve(
+      __dirname,
+      '../../frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar'
+    ),
+    parserOutputPath: path.resolve(
+      __dirname,
+      '../../frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs'
+    ),
+    termsOutputPath: path.resolve(
+      __dirname,
+      '../../frontend/js/features/source-editor/lezer-bibtex/bibtex.terms.mjs'
+    ),
+  },
+]
+
+function compile(grammar) {
+  const { grammarPath, termsOutputPath, parserOutputPath } = grammar
+  const moduleStyle = 'es'
+  console.info(`Compiling ${grammarPath}`)
+
+  const grammarText = readFileSync(grammarPath, 'utf8')
+  console.info(`Loaded grammar from ${grammarPath}`)
+
+  const { parser, terms } = buildParserFile(grammarText, {
+    fileName: grammarPath,
+    moduleStyle,
+  })
+  console.info(`Built parser`)
+
+  writeFileSync(parserOutputPath, parser)
+  console.info(`Wrote parser to ${parserOutputPath}`)
+
+  writeFileSync(termsOutputPath, terms)
+  console.info(`Wrote terms to ${termsOutputPath}`)
+
+  console.info('Done!')
+}
+
+module.exports = { compile, grammars }
+
+if (require.main === module) {
+  try {
+    grammars.forEach(compile)
+    process.exit(0)
+  } catch (err) {
+    console.error(err)
+    process.exit(1)
+  }
+}
--- a/services/web/scripts/lezer-latex/print-tree.mjs
+++ b/services/web/scripts/lezer-latex/print-tree.mjs
@@ -0,0 +1,215 @@
+// from https://gist.github.com/msteen/e4828fbf25d6efef73576fc43ac479d2
+// https://discuss.codemirror.net/t/whats-the-best-to-test-and-debug-grammars/2542/5
+// MIT License
+//
+// Copyright (c) 2021 Matthijs Steen
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+import { Text } from '@codemirror/state'
+import { Tree, TreeCursor } from '@lezer/common'
+
+class StringInput {
+  constructor(input) {
+    this.input = input
+    this.lineChunks = false
+  }
+
+  get length() {
+    return this.input.length
+  }
+
+  chunk(from) {
+    return this.input.slice(from)
+  }
+
+  read(from, to) {
+    return this.input.slice(from, to)
+  }
+}
+
+function cursorNode({ type, from, to }, isLeaf = false) {
+  return { type, from, to, isLeaf }
+}
+function traverseTree(
+  cursor,
+  {
+    from = -Infinity,
+    to = Infinity,
+    includeParents = false,
+    beforeEnter,
+    onEnter,
+    onLeave,
+  }
+) {
+  if (!(cursor instanceof TreeCursor))
+    cursor = cursor instanceof Tree ? cursor.cursor() : cursor.cursor()
+  for (;;) {
+    let node = cursorNode(cursor)
+    let leave = false
+    if (node.from <= to && node.to >= from) {
+      const enter =
+        !node.type.isAnonymous &&
+        (includeParents || (node.from >= from && node.to <= to))
+      if (enter && beforeEnter) beforeEnter(cursor)
+      node.isLeaf = !cursor.firstChild()
+      if (enter) {
+        leave = true
+        if (onEnter(node) === false) return
+      }
+      if (!node.isLeaf) continue
+    }
+    for (;;) {
+      node = cursorNode(cursor, node.isLeaf)
+      if (leave && onLeave) if (onLeave(node) === false) return
+      leave = cursor.type.isAnonymous
+      node.isLeaf = false
+      if (cursor.nextSibling()) break
+      if (!cursor.parent()) return
+      leave = true
+    }
+  }
+}
+function isChildOf(child, parent) {
+  return (
+    child.from >= parent.from &&
+    child.from <= parent.to &&
+    child.to <= parent.to &&
+    child.to >= parent.from
+  )
+}
+function validatorTraversal(input, { fullMatch = true } = {}) {
+  if (typeof input === 'string') input = new StringInput(input)
+  const state = {
+    valid: true,
+    parentNodes: [],
+    lastLeafTo: 0,
+  }
+  return {
+    state,
+    traversal: {
+      onEnter(node) {
+        state.valid = true
+        if (!node.isLeaf) state.parentNodes.unshift(node)
+        if (node.from > node.to || node.from < state.lastLeafTo) {
+          state.valid = false
+        } else if (node.isLeaf) {
+          if (
+            state.parentNodes.length &&
+            !isChildOf(node, state.parentNodes[0])
+          )
+            state.valid = false
+          state.lastLeafTo = node.to
+        } else {
+          if (state.parentNodes.length) {
+            if (!isChildOf(node, state.parentNodes[0])) state.valid = false
+          } else if (
+            fullMatch &&
+            (node.from !== 0 || node.to !== input.length)
+          ) {
+            state.valid = false
+          }
+        }
+      },
+      onLeave(node) {
+        if (!node.isLeaf) state.parentNodes.shift()
+      },
+    },
+  }
+}
+
+let Color
+;(function (Color) {
+  Color[(Color.Red = 31)] = 'Red'
+  Color[(Color.Green = 32)] = 'Green'
+  Color[(Color.Yellow = 33)] = 'Yellow'
+})(Color || (Color = {}))
+
+function colorize(value, color) {
+  return '\u001b[' + color + 'm' + String(value) + '\u001b[39m'
+}
+
+function printTree(
+  cursor,
+  input,
+  { from, to, start = 0, includeParents } = {}
+) {
+  const inp = typeof input === 'string' ? new StringInput(input) : input
+  const text = Text.of(inp.read(0, inp.length).split('\n'))
+  const state = {
+    output: '',
+    prefixes: [],
+    hasNextSibling: false,
+  }
+  const validator = validatorTraversal(inp)
+  traverseTree(cursor, {
+    from,
+    to,
+    includeParents,
+    beforeEnter(cursor) {
+      state.hasNextSibling = cursor.nextSibling() && cursor.prevSibling()
+    },
+    onEnter(node) {
+      validator.traversal.onEnter(node)
+      const isTop = state.output === ''
+      const hasPrefix = !isTop || node.from > 0
+      if (hasPrefix) {
+        state.output += (!isTop ? '\n' : '') + state.prefixes.join('')
+        if (state.hasNextSibling) {
+          state.output += ' ├─ '
+          state.prefixes.push(' │  ')
+        } else {
+          state.output += ' └─ '
+          state.prefixes.push('    ')
+        }
+      }
+      const hasRange = node.from !== node.to
+      state.output +=
+        (node.type.isError || !validator.state.valid
+          ? colorize('ERROR ' + node.type.name, Color.Red)
+          : node.type.name) +
+        ' ' +
+        (hasRange
+          ? '[' +
+            colorize(locAt(text, start + node.from), Color.Yellow) +
+            '..' +
+            colorize(locAt(text, start + node.to), Color.Yellow) +
+            ']'
+          : colorize(locAt(text, start + node.from), Color.Yellow))
+      if (hasRange && node.isLeaf) {
+        state.output +=
+          ': ' +
+          colorize(JSON.stringify(inp.read(node.from, node.to)), Color.Green)
+      }
+    },
+    onLeave(node) {
+      validator.traversal.onLeave(node)
+      state.prefixes.pop()
+    },
+  })
+  return state.output
+}
+
+function locAt(text, pos) {
+  const line = text.lineAt(pos)
+  return line.number + ':' + (pos - line.from)
+}
+
+export function logTree(tree, input, options) {
+  console.warn(printTree(tree, input, options))
+}
--- a/services/web/scripts/lezer-latex/random.mjs
+++ b/services/web/scripts/lezer-latex/random.mjs
@@ -0,0 +1,19 @@
+// Super quick and dirty LCG PRNG
+
+const m = 0xffffffff
+let X = Math.floor(Math.random() * (m - 1))
+const a = 16807
+const c = 0
+
+// Should probably be a large-ish number
+export function seed(i) {
+  if (i < 0) {
+    throw new Error('Seed must be a positive integer')
+  }
+  X = i & m
+}
+
+export function random() {
+  X = (a * X + c) % m
+  return X / m
+}
--- a/services/web/scripts/lezer-latex/run.mjs
+++ b/services/web/scripts/lezer-latex/run.mjs
@@ -0,0 +1,79 @@
+import { readFileSync } from 'node:fs'
+import { logTree } from './print-tree.mjs'
+import { parser as LaTeXParser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
+import { parser as BibTeXParser } from '../../frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs'
+
+// Runs the lezer-latex or lezer-bibtex parser on a supplied file, and prints the resulting
+// parse tree to stdout
+//
+// show parse tree:     lezer-latex-run.js test/unit/src/LezerLatex/examples/amsmath.tex
+//                      lezer-latex-run.js test/unit/src/LezerLatex/examples/overleaf.bib
+// show error summary:  lezer-latex-run.js coverage test/unit/src/LezerLatex/examples/amsmath.tex
+
+let files = process.argv.slice(2)
+if (!files.length) {
+  files = ['test/unit/src/LezerLatex/examples/demo.tex']
+}
+
+let coverage = false
+if (files[0] === 'coverage') {
+  // count errors
+  coverage = true
+  files.shift()
+}
+
+function reportErrorCounts(output) {
+  if (coverage) process.stdout.write(output)
+}
+
+function parseFile(filename) {
+  const text = readFileSync(filename).toString()
+  const t0 = process.hrtime()
+  const parser = filename.endsWith('.bib') ? BibTeXParser : LaTeXParser
+  const tree = parser.parse(text)
+  const dt = process.hrtime(t0)
+  const timeTaken = dt[0] + dt[1] * 1e-9
+  let errorCount = 0
+  let nodeCount = 0
+  tree.iterate({
+    enter: syntaxNodeRef => {
+      nodeCount++
+      if (syntaxNodeRef.type.isError) {
+        errorCount++
+      }
+    },
+  })
+  if (!coverage) logTree(tree, text)
+  return { nodeCount, errorCount, timeTaken, bytes: text.length }
+}
+
+let totalErrors = 0
+let totalTime = 0
+let totalBytes = 0
+for (const file of files) {
+  const { nodeCount, errorCount, timeTaken, bytes } = parseFile(file)
+  const errorRate = Math.round((100 * errorCount) / nodeCount)
+  totalErrors += errorCount
+  totalTime += timeTaken
+  totalBytes += bytes
+  reportErrorCounts(
+    `${errorCount} errors`.padStart(12) +
+      `${nodeCount} nodes`.padStart(12) +
+      `(${errorRate}%)`.padStart(6) +
+      `${(1000 * timeTaken).toFixed(1)} ms`.padStart(8) +
+      `${(bytes / 1024).toFixed(1)} KB`.padStart(8) +
+      ` ${file}\n`
+  )
+}
+const timeInMilliseconds = 1000 * totalTime
+const hundredKBs = totalBytes / (100 * 1024)
+
+reportErrorCounts(
+  `\ntotal errors ${totalErrors}, performance ${(
+    timeInMilliseconds / hundredKBs
+  ).toFixed(1)} ms/100KB \n`
+)
+
+if (totalErrors > 0) {
+  process.exit(1) // return non-zero exit status for tests
+}
--- a/services/web/scripts/lezer-latex/test-incremental-parser.mjs
+++ b/services/web/scripts/lezer-latex/test-incremental-parser.mjs
@@ -0,0 +1,163 @@
+import { parser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
+
+import * as fs from 'node:fs'
+import * as path from 'node:path'
+import { fileURLToPath } from 'node:url'
+import { TreeFragment } from '@lezer/common'
+import minimist from 'minimist'
+import { seed, random } from './random.mjs'
+
+const argv = minimist(process.argv.slice(2))
+const NUMBER_OF_OPS = argv.ops || 1000
+const CSV_OUTPUT = argv.csv || false
+const SEED = argv.seed
+
+if (SEED) {
+  seed(SEED)
+}
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url))
+
+const examplesDir = path.join(
+  __dirname,
+  '../../test/unit/src/LezerLatex/examples'
+)
+
+const folder = examplesDir
+for (const file of fs.readdirSync(folder).sort()) {
+  if (!/\.tex$/.test(file)) continue
+  const name = /^[^.]*/.exec(file)[0]
+  const content = fs.readFileSync(path.join(folder, file), 'utf8')
+  runPerformanceTests(name, content)
+}
+
+function runPerformanceTests(name, content) {
+  const insertEnd = writeTextAt(
+    content,
+    content.length,
+    content.substring(0, NUMBER_OF_OPS)
+  )
+  const insertBeginning = writeTextAt(
+    content,
+    0,
+    content.substring(0, NUMBER_OF_OPS)
+  )
+  const insertMiddle = writeTextAt(
+    content,
+    Math.floor(content.length / 2),
+    content.substring(0, NUMBER_OF_OPS)
+  )
+  const randomDelete = randomDeletions(content, NUMBER_OF_OPS)
+  const middleDelete = deletionsFromMiddle(content, NUMBER_OF_OPS)
+  const randomInsert = randomInsertions(content, NUMBER_OF_OPS)
+
+  if (CSV_OUTPUT) {
+    console.log(
+      [
+        name,
+        insertBeginning.average,
+        insertMiddle.average,
+        insertEnd.average,
+        randomInsert.average,
+        randomDelete.average,
+        middleDelete.average,
+        content.length,
+      ].join(',')
+    )
+  } else {
+    console.log({
+      name,
+      insertAtEnd: insertEnd.average,
+      insertAtBeginning: insertBeginning.average,
+      insertAtMiddle: insertMiddle.average,
+      randomDelete: randomDelete.average,
+      middleDelete: middleDelete.average,
+      randomInsert: randomInsert.average,
+      docLength: content.length,
+    })
+  }
+}
+
+function timedChanges(document, changes, changeFn) {
+  let totalParseTime = 0
+
+  // Do a fresh parse to get TreeFragments
+  const initialTree = parser.parse(document)
+  let fragments = TreeFragment.addTree(initialTree)
+  let currentDoc = document
+
+  for (let i = 0; i < changes; ++i) {
+    const change = changeFn(currentDoc, i)
+    currentDoc = change.text
+    // Do a timed parse
+    const start = performance.now()
+    fragments = TreeFragment.applyChanges(fragments, [change.range])
+    const tree = parser.parse(currentDoc, fragments)
+    fragments = TreeFragment.addTree(tree, fragments)
+    const end = performance.now()
+    totalParseTime += end - start
+  }
+  return {
+    total: totalParseTime,
+    average: totalParseTime / changes,
+    ops: changes,
+    fragments: fragments.length,
+  }
+}
+
+// Write and parse after every character insertion
+function writeTextAt(document, position, text) {
+  return timedChanges(document, text.length, (currentDoc, index) =>
+    insertAt(currentDoc, position + index, text[index])
+  )
+}
+
+function randomInsertions(document, num) {
+  return timedChanges(document, num, currentDoc =>
+    insertAt(currentDoc, Math.floor(random() * currentDoc.length), 'a')
+  )
+}
+
+function randomDeletions(document, num) {
+  return timedChanges(document, num, currentDoc =>
+    deleteAt(currentDoc, Math.floor(random() * currentDoc.length), 1)
+  )
+}
+
+function deletionsFromMiddle(document, num) {
+  const deletionPoint = Math.floor(document.length / 2)
+  const deletions = Math.min(num, deletionPoint - 1)
+  return timedChanges(document, deletions, (currentDoc, index) =>
+    deleteAt(currentDoc, deletionPoint - index, 1)
+  )
+}
+
+function insertAt(document, position, text) {
+  const start = document.substring(0, position)
+  const end = document.substring(position)
+
+  return {
+    text: start + text + end,
+    range: {
+      fromA: position,
+      toA: position,
+      fromB: position,
+      toB: position + text.length,
+    },
+  }
+}
+
+function deleteAt(document, position, length = 1) {
+  const start = document.substring(0, position)
+  const end = document.substring(position + length)
+
+  return {
+    text: start + end,
+    range: {
+      fromA: position,
+      toA: position + length,
+      fromB: position,
+      toB: position,
+    },
+  }
+}