first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
FROM emscripten/emsdk:3.1.67
RUN apt-get update && apt-get install -y \
autoconf \
automake \
autopoint \
build-essential \
libtool \
cmake

View File

@@ -0,0 +1,175 @@
/* eslint-disable no-dupe-class-members */
import { v4 as uuid } from 'uuid'
import getMeta from '@/utils/meta'
import { debugConsole } from '@/utils/debugging'
import { captureException } from '@/infrastructure/error-reporter'
type SpellMessage = {
type: 'spell'
words: string[]
}
type SuggestMessage = {
type: 'suggest'
word: string
}
type AddWordMessage = {
type: 'add_word'
word: string
}
type RemoveWordMessage = {
type: 'remove_word'
word: string
}
type DestroyMessage = {
type: 'destroy'
}
type Message = { id?: string } & (
| SpellMessage
| SuggestMessage
| AddWordMessage
| RemoveWordMessage
| DestroyMessage
)
type EmptyResult = Record<string, never>
type ErrorResult = {
error: true
}
type SpellResult = {
misspellings: { index: number }[]
}
type SuggestResult = {
suggestions: string[]
}
type ResultCallback =
| ((value: SpellResult | ErrorResult) => void)
| ((value: SuggestResult | ErrorResult) => void)
| ((value: EmptyResult | ErrorResult) => void)
export class HunspellManager {
baseAssetPath: string
dictionariesRoot: string
hunspellWorker!: Worker
abortController: AbortController | undefined
listening = false
loaded = false
loadingFailed = false
pendingMessages: Message[] = []
callbacks: Map<string, ResultCallback> = new Map()
constructor(
private readonly language: string,
private readonly learnedWords: string[]
) {
this.baseAssetPath = new URL(
getMeta('ol-baseAssetPath'),
window.location.href
).toString()
this.dictionariesRoot = getMeta('ol-dictionariesRoot')
this.hunspellWorker = new Worker(
/* webpackChunkName: "hunspell-worker" */
new URL('./hunspell.worker.ts', import.meta.url),
{ type: 'module' }
)
this.hunspellWorker.addEventListener('message', this.receive.bind(this))
}
destroy() {
this.send({ type: 'destroy' }, () => {
this.hunspellWorker.terminate()
})
}
send(
message: AddWordMessage,
callback: (value: EmptyResult | ErrorResult) => void
): void
send(
message: RemoveWordMessage,
callback: (value: EmptyResult | ErrorResult) => void
): void
send(
message: DestroyMessage,
callback: (value: EmptyResult | ErrorResult) => void
): void
send(
message: SuggestMessage,
callback: (value: SuggestResult | ErrorResult) => void
): void
send(
message: SpellMessage,
callback: (value: SpellResult | ErrorResult) => void
): void
send(message: Message, callback: ResultCallback): void {
if (this.loadingFailed) {
return // ignore the message
}
if (callback) {
message.id = uuid()
this.callbacks.set(message.id, callback)
}
if (this.listening) {
this.hunspellWorker.postMessage(message)
} else {
this.pendingMessages.push(message)
}
}
receive(event: MessageEvent) {
debugConsole.log(event.data)
const { id, ...rest } = event.data
if (id) {
const callback = this.callbacks.get(id)
if (callback) {
this.callbacks.delete(id)
callback(rest)
}
} else if (rest.listening) {
this.listening = true
this.hunspellWorker.postMessage({
type: 'init',
lang: this.language,
learnedWords: this.learnedWords,
baseAssetPath: this.baseAssetPath,
dictionariesRoot: this.dictionariesRoot,
})
for (const message of this.pendingMessages) {
this.hunspellWorker.postMessage(message)
this.pendingMessages.length = 0
}
} else if (rest.loaded) {
this.loaded = true
} else if (rest.loadingFailed) {
captureException(
new Error('Spell check loading failed', {
cause: rest.loadingFailed,
}),
{
tags: { ol_spell_check_language: this.language },
}
)
this.loadingFailed = true
this.pendingMessages.length = 0
}
}
}

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -e
# build an Emscripten SDK Docker image with Hunspell's build dependencies installed
docker build --pull --tag overleaf/emsdk .
# compile Hunspell to WASM and copy the output files from the Docker container
docker run --rm \
--workdir /opt \
--volume "$(pwd)/wasm":/wasm \
--volume "$(pwd)/compile.sh":/opt/compile.sh:ro \
overleaf/emsdk \
bash compile.sh

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -e
COMMIT="e994dceb97fb695bca6bfe5ad5665525426bf01f"
curl -L "https://github.com/hunspell/hunspell/archive/${COMMIT}.tar.gz" | tar xvz
cd "hunspell-${COMMIT}"
autoreconf -fiv
emconfigure ./configure --disable-shared --enable-static
emmake make
em++ \
-s EXPORTED_FUNCTIONS="['_Hunspell_create', '_Hunspell_destroy', '_Hunspell_spell', '_Hunspell_suggest', '_Hunspell_free_list', '_Hunspell_add_dic', '_Hunspell_add', '_Hunspell_remove', '_free', '_malloc', 'FS']" \
-s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'MEMFS']" \
-s ENVIRONMENT=worker \
-s STACK_SIZE=5MB \
-s ALLOW_MEMORY_GROWTH \
-O2 \
-g2 \
src/hunspell/.libs/libhunspell-1.7.a \
-o hunspell.mjs
cp hunspell.{mjs,wasm} /wasm/

View File

@@ -0,0 +1,247 @@
import Hunspell from './wasm/hunspell'
import hunspellWasmPath from './wasm/hunspell.wasm'
import { buildAdditionalDictionary } from './wordlists/dictionary-additions'
type SpellChecker = {
spell(words: string[]): { index: number }[]
suggest(word: string): string[]
addWord(word: string): void
removeWord(word: string): void
destroy(): void
}
const createSpellChecker = async ({
lang,
learnedWords,
baseAssetPath,
dictionariesRoot,
}: {
lang: string
learnedWords: string[]
baseAssetPath: string
dictionariesRoot: string
}) => {
const fileLocations: Record<string, string> = {
'hunspell.wasm': new URL(hunspellWasmPath, baseAssetPath).toString(),
}
const hunspell = await Hunspell({
locateFile(file: string) {
return fileLocations[file]
},
})
const {
cwrap,
FS,
MEMFS,
stringToNewUTF8,
_malloc,
_free,
getValue,
UTF8ToString,
} = hunspell
// https://github.com/hunspell/hunspell/blob/master/src/hunspell/hunspell.h
// https://github.com/kwonoj/hunspell-asm/blob/master/src/wrapHunspellInterface.ts
const create = cwrap('Hunspell_create', 'number', ['number', 'number'])
const destroy = cwrap('Hunspell_destroy', 'number', ['number', 'number'])
const spell = cwrap('Hunspell_spell', 'number', ['number', 'number'])
const suggest = cwrap('Hunspell_suggest', 'number', [
'number',
'number',
'number',
])
const addDic = cwrap('Hunspell_add_dic', 'number', ['number', 'number'])
const addWord = cwrap('Hunspell_add', 'number', ['number', 'number'])
const removeWord = cwrap('Hunspell_remove', 'number', ['number', 'number'])
const freeList = cwrap('Hunspell_free_list', 'number', [
'number',
'number',
'number',
])
FS.mkdir('/dictionaries')
const dictionariesRootURL = new URL(dictionariesRoot, baseAssetPath)
const [dic, aff] = await Promise.all([
fetch(new URL(`./${lang}.dic`, dictionariesRootURL)).then(response =>
response.arrayBuffer()
),
fetch(new URL(`./${lang}.aff`, dictionariesRootURL)).then(response =>
response.arrayBuffer()
),
])
FS.mount(MEMFS, {}, '/dictionaries')
FS.writeFile('/dictionaries/index.dic', new Uint8Array(dic))
FS.writeFile('/dictionaries/index.aff', new Uint8Array(aff))
const dicPtr = stringToNewUTF8('/dictionaries/index.dic')
const affPtr = stringToNewUTF8('/dictionaries/index.aff')
const spellPtr = create(affPtr, dicPtr)
FS.writeFile(
'/dictionaries/extra.dic',
await buildAdditionalDictionary(lang, learnedWords)
)
const extraDicPtr = stringToNewUTF8('/dictionaries/extra.dic')
addDic(spellPtr, extraDicPtr)
_free(extraDicPtr)
const spellChecker: SpellChecker = {
spell(words) {
const misspellings: { index: number }[] = []
for (const [index, word] of words.entries()) {
const wordPtr = stringToNewUTF8(word)
const spellResult = spell(spellPtr, wordPtr)
_free(wordPtr)
if (spellResult === 0) {
misspellings.push({ index })
}
}
return misspellings
},
suggest(word) {
const suggestions: string[] = []
const suggestionListPtr = _malloc(4)
const wordPtr = stringToNewUTF8(word)
const suggestionCount = suggest(spellPtr, suggestionListPtr, wordPtr)
_free(wordPtr)
const suggestionListValuePtr = getValue(suggestionListPtr, '*')
for (let i = 0; i < suggestionCount; i++) {
const suggestion = UTF8ToString(
getValue(suggestionListValuePtr + i * 4, '*')
)
suggestions.push(suggestion)
}
freeList(spellPtr, suggestionListPtr, suggestionCount)
_free(suggestionListPtr)
return suggestions
},
addWord(word) {
const wordPtr = stringToNewUTF8(word)
const result = addWord(spellPtr, wordPtr)
_free(wordPtr)
if (result !== 0) {
throw new Error('The word could not be added to the dictionary')
}
},
removeWord(word) {
const wordPtr = stringToNewUTF8(word)
const result = removeWord(spellPtr, wordPtr)
_free(wordPtr)
if (result !== 0) {
throw new Error('The word could not be removed from the dictionary')
}
},
destroy() {
destroy(spellPtr)
_free(spellPtr)
_free(dicPtr)
_free(affPtr)
},
}
return spellChecker
}
let spellCheckerPromise: Promise<SpellChecker>
self.addEventListener('message', async event => {
switch (event.data.type) {
case 'init':
try {
spellCheckerPromise = createSpellChecker(event.data)
await spellCheckerPromise
self.postMessage({ loaded: true })
} catch (error) {
console.error(error)
self.postMessage({ loadingFailed: error })
}
break
case 'spell':
{
const { id, words } = event.data
try {
const spellChecker = await spellCheckerPromise
const misspellings = spellChecker.spell(words)
self.postMessage({ id, misspellings })
} catch (error) {
console.error(error)
self.postMessage({ id, error: true })
}
}
break
case 'suggest':
{
const { id, word } = event.data
try {
const spellChecker = await spellCheckerPromise
const suggestions = spellChecker.suggest(word)
self.postMessage({ id, suggestions })
} catch (error) {
console.error(error)
self.postMessage({ id, error: true })
}
}
break
case 'add_word':
{
const { id, word } = event.data
try {
const spellChecker = await spellCheckerPromise
spellChecker.addWord(word)
self.postMessage({ id })
} catch (error) {
console.error(error)
self.postMessage({ id, error: true })
}
}
break
case 'remove_word':
{
const { id, word } = event.data
try {
const spellChecker = await spellCheckerPromise
spellChecker.removeWord(word)
self.postMessage({ id })
} catch (error) {
console.error(error)
self.postMessage({ id, error: true })
}
}
break
case 'destroy':
{
const { id } = event.data
try {
const spellChecker = await spellCheckerPromise
spellChecker.destroy()
self.postMessage({ id })
} catch (error) {
console.error(error)
self.postMessage({ id, error: true })
}
}
break
}
})
self.postMessage({ listening: true })

View File

@@ -0,0 +1,9 @@
# Hunspell
The files in this directory are:
* `hunspell.wasm`: [Hunspell](https://github.com/hunspell/hunspell) compiled to WebAssembly using Emscripten, via the [build.sh](../build.sh) script.
* `hunspell.mjs`: a JavaScript wrapper for the WebAssembly module, generated by Emscripten.
* `hunspell.d.ts`: manually-created types for the exports from the JavaScript module.
Note: To speed up compilation on ARM architecture (e.g. Apple M1), add `-arm64` to the Docker image tag in `Dockerfile`.

View File

@@ -0,0 +1,73 @@
/* eslint no-dupe-class-members: 0 */
declare class Hunspell {
cwrap(
method: 'Hunspell_create',
output: string,
input: string[]
): (affPtr: number, dicPtr: number) => number
cwrap(
method: 'Hunspell_destroy',
output: string,
input: string[]
): (spellPtr: number) => number
cwrap(
method: 'Hunspell_spell',
output: string,
input: string[]
): (spellPtr: number, wordPtr: number) => number
cwrap(
method: 'Hunspell_suggest',
output: string,
input: string[]
): (spellPtr: number, suggestionListPtr: number, wordPtr: number) => number
cwrap(
method: 'Hunspell_add_dic',
output: string,
input: string[]
): (spellPtr: number, wordPtr: number) => number
cwrap(
method: 'Hunspell_add',
output: string,
input: string[]
): (spellPtr: number, wordPtr: number) => number
cwrap(
method: 'Hunspell_remove',
output: string,
input: string[]
): (spellPtr: number, wordPtr: number) => number
cwrap(
method: 'Hunspell_free_list',
output: string,
input: string[]
): (spellPtr: number, suggestionListPtr: number, n: number) => number
stringToNewUTF8(input: string): number
UTF8ToString(input: number): string
_malloc(length: number): number
_free(ptr: number): void
getValue(ptr: number, type: string): number
FS: {
mkdir(path: string): void
mount(type: any, opts: Record<string, any>, dir: string): void
writeFile(
path: string,
data: string | ArrayBufferView,
opts?: { flags?: string }
)
}
MEMFS: any
}
declare const factory = async (options?: Record<string, any>) =>
new Hunspell(options)
export default factory

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,18 @@
const dictionaryAdditions = new Set(['en_US'])
export const buildAdditionalDictionary = async (
lang: string,
learnedWords: string[]
) => {
const words = [...learnedWords]
if (dictionaryAdditions.has(lang)) {
const wordList = await import(`./${lang}.txt`).then(m => m.default)
words.push(...wordList.split('\n').filter(Boolean))
}
// the first line contains the approximate word count
words.unshift(String(words.length))
return new TextEncoder().encode(words.join('\n'))
}

View File

@@ -0,0 +1,535 @@
Abaqus
abelian
ACF
ACS
AdaBoost
AFM
AGN
AIC
AlexNet
AMR
anharmonicity
ANNs
ANOVA
ANSYS
apriori
arcsec
ARIMA
ARMA
arXiv
ASIC
ASR
asymptotics
AUC
autoencoder
autoencoders
AWGN
axion
backend
backhaul
backpropagation
Balmer
beamformer
beamforming
beamline
BER
BFGS
BiLSTM
binarization
binarized
Biot
bistability
BLEU
BMI
Bogoliubov
Bonferroni
Boussinesq
Cartan
cartesian
casted
CDF
CDM
centerline
CFD
CFL
Chebyshev
Cholesky
CIFAR
CMB
CNC
CNF
Colab
collisionless
comorbidities
compactification
COMSOL
confounders
CoV
CRF
crosslinking
CSI
CubeSat
CUDA
cumulant
cumulants
CVD
DAC
DAQ
datacenter
datasheet
DBSCAN
DCT
debiasing
Debye
deformability
delocalization
denoise
denoised
denoising
DenseNet
densification
dephasing
depthwise
detrended
deviatoric
DevOps
DFS
DFT
dichalcogenides
dimerization
discretizations
discriminability
dispatchable
distinguishability
distributionally
DNN
DNNs
DOF
DOFs
DQN
DRL
Drude
DSC
DTU
dynamicity
ECM
eco
EDA
EfficientNet
eigen
eigenmode
eigenmodes
EKF
electrolyzer
EMG
endogeneity
ensembling
equivariant
ERP
ESS
Eulerian
exceedance
explainability
Fabry
FCN
FDM
FEA
feedforward
FFT
finetune
finetuned
finetuning
Floquet
fluorophore
fluorophores
Fock
forcings
FOV
FPGA
FPGAs
freestream
Frobenius
FSM
FTIR
functionalization
functionalized
functionals
FWHM
GaAs
Galerkin
GANs
gapless
GARCH
gaussian
Gaussianity
GCN
GDPR
GHG
Gini
GIS
GLM
GMM
GNN
GNNs
GNSS
GPP
GPT
grayscale
GRU
Gurobi
hadronization
Hankel
Hartree
HCI
HCl
heatmap
heatmaps
hemodynamic
heteroskedasticity
HMD
holonomic
homodyne
homophily
Hopfield
HPC
HVAC
hypergraph
hypergraphs
hyperparameter
hyperspectral
hypersurface
hypersurfaces
hysteretic
ICP
ICT
IDF
iff
IID
ILP
ImageJ
ImageNet
IMU
incentivizes
incentivizing
injectivity
inpainting
interferogram
interparticle
intertemporal
intraday
invasively
IoU
IQR
Ising
iso
Jaccard
Jupyter
JWST
Kaggle
Kalman
Keras
ket
keypoint
keypoints
Kinect
KNN
kpc
KPI
KPIs
Krylov
Kubernetes
Kutta
LabVIEW
Langevin
Larmor
LCA
LDA
Lennard
Levenberg
LHC
LHS
LIGO
Likert
Lindblad
linewidth
LLM
LLMs
logit
logits
lognormal
LoRa
LQR
LSTM
LSTMs
LTE
LTI
macroscale
Majorana
makespan
MAPE
MapReduce
Marquardt
MCMC
MCU
MDP
measurability
medoids
mesoscale
metaheuristic
metaheuristics
metamodel
meV
microenvironment
microfluidic
microfluidics
microgrid
microgrids
microscale
microservice
microservices
MILP
MIMO
MIP
misclassifications
missingness
MLE
MLP
MLPs
mmWave
MNIST
MobileNet
monodisperse
MOSFET
MOSFETs
MPC
MPI
MQTT
MSE
multiclass
multiobjective
multiphysics
multipoles
multiscale
mV
NaOH
NER
neuro
neuromorphic
NIR
NIST
NLP
NN
NNs
Noether
nonconvex
nondegenerate
nonlinearities
nonlinearly
nonlocal
nonstationary
Nusselt
ODEs
odometry
OFDM
OLS
omics
OpenCV
OpenFOAM
OpenMP
OpenStreetMap
operationalization
overdamped
overfit
overfitting
overpotential
PageRank
parameterization
parameterizations
parameterizing
paraxial
passivation
PCA
PCC
PCR
PDEs
PDMS
Peltier
perceptron
perceptrons
perturbatively
photovoltaics
piezo
PIV
PLA
planform
PMMA
PMT
polarimetric
polarizers
polytropic
posedness
poset
postprocessing
PPO
Prandtl
preconditioner
premia
prespecified
pretrained
pretraining
prosumers
proto
PSD
PSNR
PSO
PV
PWM
QFT
quadcopter
quadrotor
quintiles
Raman
RANSAC
RBF
RDF
recyclability
regularizer
ReLU
reparameterization
ResNet
reweighted
reweighting
RHS
Ricci
Riesz
RL
RLC
RMSE
RNN
RNNs
RoBERTa
ROS
RPC
RSSI
RTT
Runge
SaaS
SAR
scalings
scatterplot
Schwarz
scikit
SCM
Scopus
SDE
SDG
SDN
SDP
SDR
SDSS
SED
serverless
setpoint
setpoints
SFR
sharding
sigmoidal
Simulink
SiO
SLA
SMA
SMC
Smirnov
SMT
SNE
Sobel
Sobolev
sociodemographic
softmax
SolidWorks
soliton
solitons
SOTA
spacelike
spanwise
sparsification
SPI
spintronic
spintronics
SSIM
Stackelberg
Stata
STFT
STL
STM
stochasticity
streamwise
Strouhal
subcarrier
subcarriers
subgraph
subgraphs
subgrid
sublinear
submodular
subproblems
superlinear
supremum
SVD
SVM
SVMs
TEM
TeV
thermalization
thermo
thermomechanical
thermophysical
thresholding
Tikhonov
timelike
timeseries
timestep
Toeplitz
TOF
TP
tradeoff
tradeoffs
transcriptomic
translationally
tri
Uber
UMAP
UML
undeformed
underactuated
underexplored
underfitting
undoped
UNet
unmodeled
unpolarized
VAE
VAEs
VGG
virial
viscoelasticity
ViT
Voigt
Voronoi
voxel
voxels
Waals
walkthrough
Wasserstein
wavefunction
wavefunctions
wavenumber
wavenumbers
wettability
Wilcoxon
XAI
XGBoost
Xilinx
XRD
Yukawa
Zehnder