first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
import { parser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
import * as fs from 'node:fs'
import * as path from 'node:path'
import { fileURLToPath } from 'node:url'
import minimist from 'minimist'
const argv = minimist(process.argv.slice(2))
const NUMBER_OF_OPS = argv.ops || 100
const CSV_OUTPUT = argv.csv || false
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const examplesDir = path.join(
__dirname,
'../../test/unit/src/LezerLatex/examples'
)
const strictParser = parser.configure({ strict: true }) // throw exception for invalid documents
if (!fs.existsSync(examplesDir)) {
console.error('No examples directory')
process.exit()
}
function dumpParserStats(parser) {
console.log('Parser size:')
console.dir({
states: parser.states.length,
data: parser.data.length,
goto: parser.goto.length,
})
}
dumpParserStats(strictParser)
const folder = examplesDir
for (const file of fs.readdirSync(folder).sort()) {
if (!/\.tex$/.test(file)) continue
const name = /^[^.]*/.exec(file)[0]
const content = fs.readFileSync(path.join(folder, file), 'utf8')
benchmark(name, content)
}
function benchmark(name, content) {
let timeSum = 0
try {
for (let i = 0; i < NUMBER_OF_OPS; ++i) {
const startTime = performance.now()
strictParser.parse(content)
const endTime = performance.now()
timeSum += endTime - startTime
}
const avgTime = timeSum / NUMBER_OF_OPS
if (CSV_OUTPUT) {
console.log(`${name},${avgTime.toFixed(2)},${content.length}`)
} else {
console.log(
`${name.padEnd(20)} time to run (ms):\t ${avgTime.toFixed(2)}`
)
}
} catch (error) {
console.error(`${name.padEnd(20)} ${error}`)
}
}

View File

@@ -0,0 +1,69 @@
const { buildParserFile } = require('@lezer/generator')
const { writeFileSync, readFileSync } = require('fs')
const path = require('path')
const grammars = [
{
grammarPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-latex/latex.grammar'
),
parserOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
),
termsOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-latex/latex.terms.mjs'
),
},
{
grammarPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar'
),
parserOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs'
),
termsOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-bibtex/bibtex.terms.mjs'
),
},
]
function compile(grammar) {
const { grammarPath, termsOutputPath, parserOutputPath } = grammar
const moduleStyle = 'es'
console.info(`Compiling ${grammarPath}`)
const grammarText = readFileSync(grammarPath, 'utf8')
console.info(`Loaded grammar from ${grammarPath}`)
const { parser, terms } = buildParserFile(grammarText, {
fileName: grammarPath,
moduleStyle,
})
console.info(`Built parser`)
writeFileSync(parserOutputPath, parser)
console.info(`Wrote parser to ${parserOutputPath}`)
writeFileSync(termsOutputPath, terms)
console.info(`Wrote terms to ${termsOutputPath}`)
console.info('Done!')
}
module.exports = { compile, grammars }
if (require.main === module) {
try {
grammars.forEach(compile)
process.exit(0)
} catch (err) {
console.error(err)
process.exit(1)
}
}

View File

@@ -0,0 +1,215 @@
// from https://gist.github.com/msteen/e4828fbf25d6efef73576fc43ac479d2
// https://discuss.codemirror.net/t/whats-the-best-to-test-and-debug-grammars/2542/5
// MIT License
//
// Copyright (c) 2021 Matthijs Steen
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
import { Text } from '@codemirror/state'
import { Tree, TreeCursor } from '@lezer/common'
class StringInput {
constructor(input) {
this.input = input
this.lineChunks = false
}
get length() {
return this.input.length
}
chunk(from) {
return this.input.slice(from)
}
read(from, to) {
return this.input.slice(from, to)
}
}
function cursorNode({ type, from, to }, isLeaf = false) {
return { type, from, to, isLeaf }
}
function traverseTree(
cursor,
{
from = -Infinity,
to = Infinity,
includeParents = false,
beforeEnter,
onEnter,
onLeave,
}
) {
if (!(cursor instanceof TreeCursor))
cursor = cursor instanceof Tree ? cursor.cursor() : cursor.cursor()
for (;;) {
let node = cursorNode(cursor)
let leave = false
if (node.from <= to && node.to >= from) {
const enter =
!node.type.isAnonymous &&
(includeParents || (node.from >= from && node.to <= to))
if (enter && beforeEnter) beforeEnter(cursor)
node.isLeaf = !cursor.firstChild()
if (enter) {
leave = true
if (onEnter(node) === false) return
}
if (!node.isLeaf) continue
}
for (;;) {
node = cursorNode(cursor, node.isLeaf)
if (leave && onLeave) if (onLeave(node) === false) return
leave = cursor.type.isAnonymous
node.isLeaf = false
if (cursor.nextSibling()) break
if (!cursor.parent()) return
leave = true
}
}
}
function isChildOf(child, parent) {
return (
child.from >= parent.from &&
child.from <= parent.to &&
child.to <= parent.to &&
child.to >= parent.from
)
}
function validatorTraversal(input, { fullMatch = true } = {}) {
if (typeof input === 'string') input = new StringInput(input)
const state = {
valid: true,
parentNodes: [],
lastLeafTo: 0,
}
return {
state,
traversal: {
onEnter(node) {
state.valid = true
if (!node.isLeaf) state.parentNodes.unshift(node)
if (node.from > node.to || node.from < state.lastLeafTo) {
state.valid = false
} else if (node.isLeaf) {
if (
state.parentNodes.length &&
!isChildOf(node, state.parentNodes[0])
)
state.valid = false
state.lastLeafTo = node.to
} else {
if (state.parentNodes.length) {
if (!isChildOf(node, state.parentNodes[0])) state.valid = false
} else if (
fullMatch &&
(node.from !== 0 || node.to !== input.length)
) {
state.valid = false
}
}
},
onLeave(node) {
if (!node.isLeaf) state.parentNodes.shift()
},
},
}
}
let Color
;(function (Color) {
Color[(Color.Red = 31)] = 'Red'
Color[(Color.Green = 32)] = 'Green'
Color[(Color.Yellow = 33)] = 'Yellow'
})(Color || (Color = {}))
function colorize(value, color) {
return '\u001b[' + color + 'm' + String(value) + '\u001b[39m'
}
function printTree(
cursor,
input,
{ from, to, start = 0, includeParents } = {}
) {
const inp = typeof input === 'string' ? new StringInput(input) : input
const text = Text.of(inp.read(0, inp.length).split('\n'))
const state = {
output: '',
prefixes: [],
hasNextSibling: false,
}
const validator = validatorTraversal(inp)
traverseTree(cursor, {
from,
to,
includeParents,
beforeEnter(cursor) {
state.hasNextSibling = cursor.nextSibling() && cursor.prevSibling()
},
onEnter(node) {
validator.traversal.onEnter(node)
const isTop = state.output === ''
const hasPrefix = !isTop || node.from > 0
if (hasPrefix) {
state.output += (!isTop ? '\n' : '') + state.prefixes.join('')
if (state.hasNextSibling) {
state.output += ' ├─ '
state.prefixes.push(' │ ')
} else {
state.output += ' └─ '
state.prefixes.push(' ')
}
}
const hasRange = node.from !== node.to
state.output +=
(node.type.isError || !validator.state.valid
? colorize('ERROR ' + node.type.name, Color.Red)
: node.type.name) +
' ' +
(hasRange
? '[' +
colorize(locAt(text, start + node.from), Color.Yellow) +
'..' +
colorize(locAt(text, start + node.to), Color.Yellow) +
']'
: colorize(locAt(text, start + node.from), Color.Yellow))
if (hasRange && node.isLeaf) {
state.output +=
': ' +
colorize(JSON.stringify(inp.read(node.from, node.to)), Color.Green)
}
},
onLeave(node) {
validator.traversal.onLeave(node)
state.prefixes.pop()
},
})
return state.output
}
function locAt(text, pos) {
const line = text.lineAt(pos)
return line.number + ':' + (pos - line.from)
}
export function logTree(tree, input, options) {
console.warn(printTree(tree, input, options))
}

View File

@@ -0,0 +1,19 @@
// Super quick and dirty LCG PRNG
const m = 0xffffffff
let X = Math.floor(Math.random() * (m - 1))
const a = 16807
const c = 0
// Should probably be a large-ish number
export function seed(i) {
if (i < 0) {
throw new Error('Seed must be a positive integer')
}
X = i & m
}
export function random() {
X = (a * X + c) % m
return X / m
}

View File

@@ -0,0 +1,79 @@
import { readFileSync } from 'node:fs'
import { logTree } from './print-tree.mjs'
import { parser as LaTeXParser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
import { parser as BibTeXParser } from '../../frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs'
// Runs the lezer-latex or lezer-bibtex parser on a supplied file, and prints the resulting
// parse tree to stdout
//
// show parse tree: lezer-latex-run.js test/unit/src/LezerLatex/examples/amsmath.tex
// lezer-latex-run.js test/unit/src/LezerLatex/examples/overleaf.bib
// show error summary: lezer-latex-run.js coverage test/unit/src/LezerLatex/examples/amsmath.tex
let files = process.argv.slice(2)
if (!files.length) {
files = ['test/unit/src/LezerLatex/examples/demo.tex']
}
let coverage = false
if (files[0] === 'coverage') {
// count errors
coverage = true
files.shift()
}
function reportErrorCounts(output) {
if (coverage) process.stdout.write(output)
}
function parseFile(filename) {
const text = readFileSync(filename).toString()
const t0 = process.hrtime()
const parser = filename.endsWith('.bib') ? BibTeXParser : LaTeXParser
const tree = parser.parse(text)
const dt = process.hrtime(t0)
const timeTaken = dt[0] + dt[1] * 1e-9
let errorCount = 0
let nodeCount = 0
tree.iterate({
enter: syntaxNodeRef => {
nodeCount++
if (syntaxNodeRef.type.isError) {
errorCount++
}
},
})
if (!coverage) logTree(tree, text)
return { nodeCount, errorCount, timeTaken, bytes: text.length }
}
let totalErrors = 0
let totalTime = 0
let totalBytes = 0
for (const file of files) {
const { nodeCount, errorCount, timeTaken, bytes } = parseFile(file)
const errorRate = Math.round((100 * errorCount) / nodeCount)
totalErrors += errorCount
totalTime += timeTaken
totalBytes += bytes
reportErrorCounts(
`${errorCount} errors`.padStart(12) +
`${nodeCount} nodes`.padStart(12) +
`(${errorRate}%)`.padStart(6) +
`${(1000 * timeTaken).toFixed(1)} ms`.padStart(8) +
`${(bytes / 1024).toFixed(1)} KB`.padStart(8) +
` ${file}\n`
)
}
const timeInMilliseconds = 1000 * totalTime
const hundredKBs = totalBytes / (100 * 1024)
reportErrorCounts(
`\ntotal errors ${totalErrors}, performance ${(
timeInMilliseconds / hundredKBs
).toFixed(1)} ms/100KB \n`
)
if (totalErrors > 0) {
process.exit(1) // return non-zero exit status for tests
}

View File

@@ -0,0 +1,163 @@
import { parser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
import * as fs from 'node:fs'
import * as path from 'node:path'
import { fileURLToPath } from 'node:url'
import { TreeFragment } from '@lezer/common'
import minimist from 'minimist'
import { seed, random } from './random.mjs'
const argv = minimist(process.argv.slice(2))
const NUMBER_OF_OPS = argv.ops || 1000
const CSV_OUTPUT = argv.csv || false
const SEED = argv.seed
if (SEED) {
seed(SEED)
}
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const examplesDir = path.join(
__dirname,
'../../test/unit/src/LezerLatex/examples'
)
const folder = examplesDir
for (const file of fs.readdirSync(folder).sort()) {
if (!/\.tex$/.test(file)) continue
const name = /^[^.]*/.exec(file)[0]
const content = fs.readFileSync(path.join(folder, file), 'utf8')
runPerformanceTests(name, content)
}
function runPerformanceTests(name, content) {
const insertEnd = writeTextAt(
content,
content.length,
content.substring(0, NUMBER_OF_OPS)
)
const insertBeginning = writeTextAt(
content,
0,
content.substring(0, NUMBER_OF_OPS)
)
const insertMiddle = writeTextAt(
content,
Math.floor(content.length / 2),
content.substring(0, NUMBER_OF_OPS)
)
const randomDelete = randomDeletions(content, NUMBER_OF_OPS)
const middleDelete = deletionsFromMiddle(content, NUMBER_OF_OPS)
const randomInsert = randomInsertions(content, NUMBER_OF_OPS)
if (CSV_OUTPUT) {
console.log(
[
name,
insertBeginning.average,
insertMiddle.average,
insertEnd.average,
randomInsert.average,
randomDelete.average,
middleDelete.average,
content.length,
].join(',')
)
} else {
console.log({
name,
insertAtEnd: insertEnd.average,
insertAtBeginning: insertBeginning.average,
insertAtMiddle: insertMiddle.average,
randomDelete: randomDelete.average,
middleDelete: middleDelete.average,
randomInsert: randomInsert.average,
docLength: content.length,
})
}
}
function timedChanges(document, changes, changeFn) {
let totalParseTime = 0
// Do a fresh parse to get TreeFragments
const initialTree = parser.parse(document)
let fragments = TreeFragment.addTree(initialTree)
let currentDoc = document
for (let i = 0; i < changes; ++i) {
const change = changeFn(currentDoc, i)
currentDoc = change.text
// Do a timed parse
const start = performance.now()
fragments = TreeFragment.applyChanges(fragments, [change.range])
const tree = parser.parse(currentDoc, fragments)
fragments = TreeFragment.addTree(tree, fragments)
const end = performance.now()
totalParseTime += end - start
}
return {
total: totalParseTime,
average: totalParseTime / changes,
ops: changes,
fragments: fragments.length,
}
}
// Write and parse after every character insertion
function writeTextAt(document, position, text) {
return timedChanges(document, text.length, (currentDoc, index) =>
insertAt(currentDoc, position + index, text[index])
)
}
function randomInsertions(document, num) {
return timedChanges(document, num, currentDoc =>
insertAt(currentDoc, Math.floor(random() * currentDoc.length), 'a')
)
}
function randomDeletions(document, num) {
return timedChanges(document, num, currentDoc =>
deleteAt(currentDoc, Math.floor(random() * currentDoc.length), 1)
)
}
function deletionsFromMiddle(document, num) {
const deletionPoint = Math.floor(document.length / 2)
const deletions = Math.min(num, deletionPoint - 1)
return timedChanges(document, deletions, (currentDoc, index) =>
deleteAt(currentDoc, deletionPoint - index, 1)
)
}
function insertAt(document, position, text) {
const start = document.substring(0, position)
const end = document.substring(position)
return {
text: start + text + end,
range: {
fromA: position,
toA: position,
fromB: position,
toB: position + text.length,
},
}
}
function deleteAt(document, position, length = 1) {
const start = document.substring(0, position)
const end = document.substring(position + length)
return {
text: start + end,
range: {
fromA: position,
toA: position + length,
fromB: position,
toB: position,
},
}
}