first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,233 @@
import { isMainFile } from './editor-files'
import getMeta from '../../../utils/meta'
import { deleteJSON, postJSON } from '../../../infrastructure/fetch-json'
import { debounce } from 'lodash'
import { EDITOR_SESSION_ID, trackPdfDownload } from './metrics'
import { enablePdfCaching } from './pdf-caching-flags'
import { debugConsole } from '@/utils/debugging'
import { signalWithTimeout } from '@/utils/abort-signal'
const AUTO_COMPILE_MAX_WAIT = 5000
// We add a 2 second debounce to sending user changes to server if they aren't
// collaborating with anyone. This needs to be higher than SINGLE_USER_FLUSH_DELAY, and allow for
// client to server latency, otherwise we compile before the op reaches the server
// and then again on ack.
const AUTO_COMPILE_DEBOUNCE = 2500
// If there is a pending op, wait for it to be saved before compiling
const PENDING_OP_MAX_WAIT = 10000
const searchParams = new URLSearchParams(window.location.search)
export default class DocumentCompiler {
constructor({
compilingRef,
projectId,
setChangedAt,
setCompiling,
setData,
setFirstRenderDone,
setDeliveryLatencies,
setError,
cleanupCompileResult,
signal,
openDocs,
}) {
this.compilingRef = compilingRef
this.projectId = projectId
this.setChangedAt = setChangedAt
this.setCompiling = setCompiling
this.setData = setData
this.setFirstRenderDone = setFirstRenderDone
this.setDeliveryLatencies = setDeliveryLatencies
this.setError = setError
this.cleanupCompileResult = cleanupCompileResult
this.signal = signal
this.openDocs = openDocs
this.projectRootDocId = null
this.clsiServerId = null
this.currentDoc = null
this.error = undefined
this.timer = 0
this.defaultOptions = {
draft: false,
stopOnFirstError: false,
}
this.debouncedAutoCompile = debounce(
() => {
this.compile({ isAutoCompileOnChange: true })
},
AUTO_COMPILE_DEBOUNCE,
{
maxWait: AUTO_COMPILE_MAX_WAIT,
}
)
}
// The main "compile" function.
// Call this directly to run a compile now, otherwise call debouncedAutoCompile.
async compile(options = {}) {
options = { ...this.defaultOptions, ...options }
if (options.isAutoCompileOnLoad && getMeta('ol-preventCompileOnLoad')) {
return
}
// set "compiling" to true (in the React component's state), and return if it was already true
const wasCompiling = this.compilingRef.current
this.setCompiling(true)
if (wasCompiling) {
if (options.isAutoCompileOnChange) {
this.debouncedAutoCompile()
}
return
}
try {
await this.openDocs.awaitBufferedOps(
signalWithTimeout(this.signal, PENDING_OP_MAX_WAIT)
)
// reset values
this.setChangedAt(0) // TODO: wait for doc:saved?
this.validationIssues = undefined
const params = this.buildCompileParams(options)
const t0 = performance.now()
const rootDocId = this.getRootDocOverrideId()
const body = {
rootDoc_id: rootDocId,
draft: options.draft,
check: 'silent', // NOTE: 'error' and 'validate' are possible, but unused
// use incremental compile for all users but revert to a full compile
// if there was previously a server error
incrementalCompilesEnabled: !this.error,
stopOnFirstError: options.stopOnFirstError,
editorId: EDITOR_SESSION_ID,
}
const data = await postJSON(
`/project/${this.projectId}/compile?${params}`,
{ body, signal: this.signal }
)
const compileTimeClientE2E = Math.ceil(performance.now() - t0)
const { deliveryLatencies, firstRenderDone } = trackPdfDownload(
data,
compileTimeClientE2E,
t0
)
this.setDeliveryLatencies(() => deliveryLatencies)
this.setFirstRenderDone(() => firstRenderDone)
// unset the error before it's set again later, so that components are recreated and events are tracked
this.setError(undefined)
data.options = options
data.rootDocId = rootDocId
if (data.clsiServerId) {
this.clsiServerId = data.clsiServerId
}
this.setData(data)
} catch (error) {
debugConsole.error(error)
this.cleanupCompileResult()
this.setError(error.info?.statusCode === 429 ? 'rate-limited' : 'error')
} finally {
this.setCompiling(false)
}
}
// parse the text of the current doc in the editor
// if it contains "\documentclass" then use this as the root doc
getRootDocOverrideId() {
// only override when not in the root doc itself
if (this.currentDoc && this.currentDoc.doc_id !== this.projectRootDocId) {
const snapshot = this.currentDoc.getSnapshot()
if (snapshot && isMainFile(snapshot)) {
return this.currentDoc.doc_id
}
}
return null
}
// build the query parameters added to post-compile requests
buildPostCompileParams() {
const params = new URLSearchParams()
// the id of the CLSI server that processed the previous compile request
if (this.clsiServerId) {
params.set('clsiserverid', this.clsiServerId)
}
return params
}
// build the query parameters for the compile request
buildCompileParams(options) {
const params = new URLSearchParams()
// note: no clsiserverid query param is set on "compile" requests,
// as this is added in the backend by the web api
// tell the server whether this is an automatic or manual compile request
if (options.isAutoCompileOnLoad || options.isAutoCompileOnChange) {
params.set('auto_compile', 'true')
}
// use the feature flag to enable PDF caching
if (enablePdfCaching) {
params.set('enable_pdf_caching', 'true')
}
// use the feature flag to enable "file line errors"
if (searchParams.get('file_line_errors') === 'true') {
params.file_line_errors = 'true'
}
return params
}
// send a request to stop the current compile
stopCompile() {
// NOTE: no stoppingCompile state, as this should happen fairly quickly
// and doesn't matter if it runs twice.
const params = this.buildPostCompileParams()
return postJSON(`/project/${this.projectId}/compile/stop?${params}`, {
signal: this.signal,
})
.catch(error => {
debugConsole.error(error)
this.setError('error')
})
.finally(() => {
this.setCompiling(false)
})
}
// send a request to clear the cache
clearCache() {
const params = this.buildPostCompileParams()
return deleteJSON(`/project/${this.projectId}/output?${params}`, {
signal: this.signal,
}).catch(error => {
debugConsole.error(error)
this.setError('clear-cache')
})
}
setOption(option, value) {
this.defaultOptions[option] = value
}
}

View File

@@ -0,0 +1,4 @@
const documentClassRe = /^[^%]*\\documentclass/
export const isMainFile = doc =>
doc.split('\n').some(line => documentClassRe.test(line))

View File

@@ -0,0 +1,88 @@
import {
CompileOutputFile,
CompileResponseData,
} from '../../../../../types/compile'
import { PdfFileDataList } from '@/features/pdf-preview/util/types'
const topFileTypes = ['bbl', 'gls', 'ind']
// NOTE: Updating this list requires a corresponding change in
// * services/clsi/app/js/OutputFileArchiveManager.js
const ignoreFiles = ['output.fls', 'output.fdb_latexmk']
export function buildFileList(
outputFiles: Map<string, CompileOutputFile>,
{
clsiServerId,
compileGroup,
outputFilesArchive,
fromCache = false,
}: CompileResponseData
): PdfFileDataList {
const files: PdfFileDataList = { top: [], other: [] }
if (outputFiles) {
const params = new URLSearchParams()
if (fromCache) {
params.set('clsiserverid', 'cache')
} else if (clsiServerId) {
params.set('clsiserverid', clsiServerId)
}
if (compileGroup) {
params.set('compileGroup', compileGroup)
}
const queryString = params.toString()
const allFiles = []
// filter out ignored files and set some properties
for (const file of outputFiles.values()) {
if (!ignoreFiles.includes(file.path)) {
file.main = file.path.startsWith('output.')
if (queryString.length) {
file.url += `?${queryString}`
}
allFiles.push(file)
}
}
// sort main files first, then alphabetical
allFiles.sort((a, b) => {
if (a.main && !b.main) {
return -1
}
if (b.main && !a.main) {
return 1
}
return a.path.localeCompare(b.path, undefined, { numeric: true })
})
// group files into "top" and "other"
for (const file of allFiles) {
if (topFileTypes.includes(file.type)) {
files.top.push(file)
} else if (!(file.type === 'pdf' && file.main === true)) {
files.other.push(file)
}
}
const archivableFiles = [...files.top, ...files.other]
if (outputFilesArchive && archivableFiles.length > 0) {
archivableFiles.forEach(file => params.append('files', file.path))
files.archive = {
...outputFilesArchive,
fileCount: archivableFiles.length,
url: `${outputFilesArchive.url}?${params.toString()}`,
}
}
}
return files
}

View File

@@ -0,0 +1,35 @@
import { PDFJS } from '@/features/pdf-preview/util/pdf-js'
export function buildHighlightElement(highlight, viewer) {
const pageView = viewer.getPageView(highlight.page - 1)
const viewport = pageView.viewport
const height = viewport.viewBox[3]
const rect = viewport.convertToViewportRectangle([
highlight.h, // xMin
height - (highlight.v + highlight.height) + 10, // yMin
highlight.h + highlight.width, // xMax
height - highlight.v + 10, // yMax
])
const [left, top, right, bottom] = PDFJS.Util.normalizeRect(rect)
const element = document.createElement('div')
element.style.left = Math.floor(pageView.div.offsetLeft + left) + 'px'
element.style.top = Math.floor(pageView.div.offsetTop + top) + 'px'
element.style.width = Math.ceil(right - left) + 'px'
element.style.height = Math.ceil(bottom - top) + 'px'
element.style.backgroundColor = 'rgba(255,255,0)'
element.style.position = 'absolute'
element.style.display = 'inline-block'
element.style.scrollMargin = '72px'
element.style.pointerEvents = 'none'
element.style.opacity = '0'
element.style.transition = 'opacity 1s'
viewer.viewer?.append(element)
return element
}

View File

@@ -0,0 +1,66 @@
import { v4 as uuid } from 'uuid'
import { sendMB } from '../../../infrastructure/event-tracking'
import { trackPdfDownloadEnabled } from './pdf-caching-flags'
import { debugConsole } from '@/utils/debugging'
// VERSION should get incremented when making changes to caching behavior or
// adjusting metrics collection.
const VERSION = 9
// editing session id
export const EDITOR_SESSION_ID = uuid()
const pdfCachingMetrics = {
viewerId: EDITOR_SESSION_ID,
}
export function getPdfCachingMetrics() {
return pdfCachingMetrics
}
export function trackPdfDownload(response, compileTimeClientE2E, t0) {
const { timings, pdfCachingMinChunkSize } = response
const deliveryLatencies = {
compileTimeClientE2E,
compileTimeServerE2E: timings?.compileE2E,
}
// There can be multiple "first" renderings with two pdf viewers.
// E.g. two pdf detach tabs or pdf detacher plus pdf detach.
// Let the pdfCachingMetrics round trip to account for pdf-detach.
let isFirstRender = true
function firstRenderDone({ latencyFetch, latencyRender, pdfCachingMetrics }) {
if (!isFirstRender) return
isFirstRender = false
deliveryLatencies.totalDeliveryTime = Math.ceil(performance.now() - t0)
deliveryLatencies.latencyFetch = latencyFetch
if (latencyRender) {
deliveryLatencies.latencyRender = latencyRender
}
if (trackPdfDownloadEnabled) {
// Submit latency along with compile context.
submitCompileMetrics({
pdfCachingMinChunkSize,
...deliveryLatencies,
...pdfCachingMetrics,
})
}
}
return {
deliveryLatencies,
firstRenderDone,
}
}
function submitCompileMetrics(metrics) {
const leanMetrics = {
version: VERSION,
...metrics,
id: EDITOR_SESSION_ID,
}
debugConsole.log('/event/compile-metrics', JSON.stringify(leanMetrics))
sendMB('compile-metrics-v6', leanMetrics)
}

View File

@@ -0,0 +1,276 @@
import HumanReadableLogs from '../../../ide/human-readable-logs/HumanReadableLogs'
import BibLogParser from '../../../ide/log-parser/bib-log-parser'
import { enablePdfCaching } from './pdf-caching-flags'
import { debugConsole } from '@/utils/debugging'
import { dirname, findEntityByPath } from '@/features/file-tree/util/path'
import '@/utils/readable-stream-async-iterator-polyfill'
import { EDITOR_SESSION_ID } from '@/features/pdf-preview/util/metrics'
// Warnings that may disappear after a second LaTeX pass
const TRANSIENT_WARNING_REGEX = /^(Reference|Citation).+undefined on input line/
const MAX_LOG_SIZE = 1024 * 1024 // 1MB
const MAX_BIB_LOG_SIZE_PER_FILE = MAX_LOG_SIZE
export function handleOutputFiles(outputFiles, projectId, data) {
const outputFile = outputFiles.get('output.pdf')
if (!outputFile) return null
outputFile.editorId = outputFile.editorId || EDITOR_SESSION_ID
// build the URL for viewing the PDF in the preview UI
const params = new URLSearchParams()
if (data.compileGroup) {
params.set('compileGroup', data.compileGroup)
}
if (data.clsiServerId) {
params.set('clsiserverid', data.clsiServerId)
}
if (enablePdfCaching) {
// Tag traffic that uses the pdf caching logic.
params.set('enable_pdf_caching', 'true')
}
outputFile.pdfUrl = `${buildURL(
outputFile,
data.pdfDownloadDomain
)}?${params}`
if (data.fromCache) {
outputFile.pdfDownloadUrl = outputFile.downloadURL
} else {
// build the URL for downloading the PDF
params.set('popupDownload', 'true') // save PDF download as file
outputFile.pdfDownloadUrl = `/download/project/${projectId}/build/${outputFile.build}/output/output.pdf?${params}`
}
return outputFile
}
let nextEntryId = 1
function generateEntryKey() {
return 'compile-log-entry-' + nextEntryId++
}
export const handleLogFiles = async (outputFiles, data, signal) => {
const result = {
log: null,
logEntries: {
errors: [],
warnings: [],
typesetting: [],
},
}
function accumulateResults(newEntries, type) {
for (const key in result.logEntries) {
if (newEntries[key]) {
for (const entry of newEntries[key]) {
if (type) {
entry.type = newEntries.type
}
if (entry.file) {
entry.file = normalizeFilePath(entry.file)
}
entry.key = generateEntryKey()
}
result.logEntries[key].push(...newEntries[key])
}
}
}
const logFile = outputFiles.get('output.log')
if (logFile) {
result.log = await fetchFileWithSizeLimit(
buildURL(logFile, data.pdfDownloadDomain),
signal,
MAX_LOG_SIZE
)
try {
let { errors, warnings, typesetting } = HumanReadableLogs.parse(
result.log,
{
ignoreDuplicates: true,
}
)
if (data.status === 'stopped-on-first-error') {
// Hide warnings that could disappear after a second pass
warnings = warnings.filter(warning => !isTransientWarning(warning))
}
accumulateResults({ errors, warnings, typesetting })
} catch (e) {
debugConsole.warn(e) // ignore failure to parse the log file, but log a warning
}
}
const blgFiles = []
for (const [filename, file] of outputFiles) {
if (filename.endsWith('.blg')) {
blgFiles.push(file)
}
}
for (const blgFile of blgFiles) {
const log = await fetchFileWithSizeLimit(
buildURL(blgFile, data.pdfDownloadDomain),
signal,
MAX_BIB_LOG_SIZE_PER_FILE
)
try {
const { errors, warnings } = new BibLogParser(log, {
maxErrors: 100,
}).parse()
accumulateResults({ errors, warnings }, 'BibTeX:')
} catch (e) {
// BibLog parsing errors are ignored
}
}
result.logEntries.all = [
...result.logEntries.errors,
...result.logEntries.warnings,
...result.logEntries.typesetting,
]
return result
}
export function buildLogEntryAnnotations(entries, fileTreeData, rootDocId) {
const rootDocDirname = dirname(fileTreeData, rootDocId)
const logEntryAnnotations = {}
const seenLine = {}
for (const entry of entries) {
if (entry.file) {
entry.file = normalizeFilePath(entry.file, rootDocDirname)
const entity = findEntityByPath(fileTreeData, entry.file)?.entity
if (entity) {
if (!(entity._id in logEntryAnnotations)) {
logEntryAnnotations[entity._id] = []
}
const annotation = {
id: entry.key,
entryIndex: logEntryAnnotations[entity._id].length, // used for maintaining the order of items on the same line
row: entry.line - 1,
type: entry.level === 'error' ? 'error' : 'warning',
text: entry.message,
source: 'compile', // NOTE: this is used in Ace for filtering the annotations
ruleId: entry.ruleId,
command: entry.command,
}
// set firstOnLine for the first non-typesetting annotation on a line
if (entry.level !== 'typesetting') {
if (!seenLine[entry.line]) {
annotation.firstOnLine = true
seenLine[entry.line] = true
}
}
logEntryAnnotations[entity._id].push(annotation)
}
}
}
return logEntryAnnotations
}
export const buildRuleCounts = (entries = []) => {
const counts = {}
for (const entry of entries) {
const key = `${entry.level}_${entry.ruleId}`
counts[key] = counts[key] ? counts[key] + 1 : 1
}
return counts
}
export const buildRuleDeltas = (ruleCounts, previousRuleCounts) => {
const counts = {}
// keys that are defined in the current log entries
for (const [key, value] of Object.entries(ruleCounts)) {
const previousValue = previousRuleCounts[key] ?? 0
counts[`delta_${key}`] = value - previousValue
}
// keys that are no longer defined in the current log entries
for (const [key, value] of Object.entries(previousRuleCounts)) {
if (!(key in ruleCounts)) {
counts[key] = 0
counts[`delta_${key}`] = -value
}
}
return counts
}
function buildURL(file, pdfDownloadDomain) {
if (file.build && pdfDownloadDomain) {
// Downloads from the compiles domain must include a build id.
// The build id is used implicitly for access control.
return `${pdfDownloadDomain}${file.url}`
}
// Go through web instead, which uses mongo for checking project access.
return `${window.origin}${file.url}`
}
function normalizeFilePath(path, rootDocDirname) {
path = path.replace(/\/\//g, '/')
path = path.replace(
/^.*\/compiles\/[0-9a-f]{24}(-[0-9a-f]{24})?\/(\.\/)?/,
''
)
path = path.replace(/^\/compile\//, '')
if (rootDocDirname) {
path = path.replace(/^\.\//, rootDocDirname + '/')
}
return path
}
function isTransientWarning(warning) {
return TRANSIENT_WARNING_REGEX.test(warning.message)
}
async function fetchFileWithSizeLimit(url, signal, maxSize) {
let result = ''
try {
const abortController = new AbortController()
// abort fetching the log file if the main signal is aborted
signal.addEventListener('abort', () => {
abortController.abort()
})
const response = await fetch(url, {
signal: abortController.signal,
})
if (!response.ok) {
throw new Error('Failed to fetch log file')
}
const reader = response.body.pipeThrough(new TextDecoderStream())
for await (const chunk of reader) {
result += chunk
if (result.length > maxSize) {
abortController.abort()
}
}
} catch (e) {
debugConsole.warn(e) // ignore failure to fetch the log file, but log a warning
}
return result
}

View File

@@ -0,0 +1,29 @@
import getMeta from '../../../utils/meta'
import { debugConsole } from '@/utils/debugging'
const hasTextEncoder = typeof TextEncoder !== 'undefined'
if (!hasTextEncoder) {
debugConsole.warn('TextEncoder is not available. Disabling pdf-caching.')
}
const isOpera =
Array.isArray(navigator.userAgentData?.brands) &&
navigator.userAgentData.brands.some(b => b.brand === 'Opera')
if (isOpera) {
debugConsole.warn('Browser cache is limited in Opera. Disabling pdf-caching.')
}
function isFlagEnabled(flag) {
if (!hasTextEncoder) return false
if (isOpera) return false
return getMeta('ol-splitTestVariants')?.[flag] === 'enabled'
}
export const cachedUrlLookupEnabled = isFlagEnabled(
'pdf-caching-cached-url-lookup'
)
export const prefetchingEnabled = isFlagEnabled('pdf-caching-prefetching')
export const prefetchLargeEnabled = isFlagEnabled('pdf-caching-prefetch-large')
export const enablePdfCaching = isFlagEnabled('pdf-caching-mode')
export const trackPdfDownloadEnabled = isFlagEnabled('track-pdf-download')
export const useClsiCache = isFlagEnabled('fall-back-to-clsi-cache')

View File

@@ -0,0 +1,267 @@
import OError from '@overleaf/o-error'
import { fallbackRequest, fetchRange } from './pdf-caching'
import { captureException } from '@/infrastructure/error-reporter'
import { EDITOR_SESSION_ID, getPdfCachingMetrics } from './metrics'
import {
cachedUrlLookupEnabled,
enablePdfCaching,
prefetchingEnabled,
prefetchLargeEnabled,
trackPdfDownloadEnabled,
useClsiCache,
} from './pdf-caching-flags'
import { isNetworkError } from '@/utils/is-network-error'
import { debugConsole } from '@/utils/debugging'
import { PDFJS } from './pdf-js'
// 30 seconds: The shutdown grace period of a clsi pre-emp instance.
const STALE_OUTPUT_REQUEST_THRESHOLD_MS = 30 * 1000
export function generatePdfCachingTransportFactory() {
// NOTE: The custom transport can be used for tracking download volume.
if (!enablePdfCaching && !trackPdfDownloadEnabled) {
return () => undefined
}
const usageScore = new Map()
const cachedUrls = new Map()
const metrics = Object.assign(getPdfCachingMetrics(), {
failedCount: 0,
failedOnce: false,
tooMuchBandwidthCount: 0,
tooManyRequestsCount: 0,
cachedCount: 0,
cachedBytes: 0,
fetchedCount: 0,
fetchedBytes: 0,
latencyComputeMax: 0,
latencyComputeTotal: 0,
requestedCount: 0,
requestedBytes: 0,
oldUrlHitCount: 0,
oldUrlMissCount: 0,
enablePdfCaching,
prefetchingEnabled,
prefetchLargeEnabled,
cachedUrlLookupEnabled,
})
const verifyChunks =
new URLSearchParams(window.location.search).get('verify_chunks') === 'true'
class PDFDataRangeTransport extends PDFJS.PDFDataRangeTransport {
constructor({ url, pdfFile, abortController, handleFetchError }) {
super(pdfFile.size, new Uint8Array())
this.url = url
pdfFile.ranges = pdfFile.ranges || []
pdfFile.editorId = pdfFile.editorId || EDITOR_SESSION_ID
this.pdfFile = pdfFile
// Clone the chunks as the objectId field is encoded to a Uint8Array.
this.leanPdfRanges = pdfFile.ranges.map(r => Object.assign({}, r))
this.handleFetchError = handleFetchError
this.abortController = abortController
this.startTime = performance.now()
const params = new URL(url).searchParams
// drop no needed params
params.delete('enable_pdf_caching')
params.delete('verify_chunks')
this.queryForChunks = params.toString()
}
abort() {
this.abortController.abort()
}
requestDataRange(start, end) {
const abortSignal = this.abortController.signal
const getDebugInfo = () => ({
// Sentry does not serialize objects in twice nested objects.
// Move the ranges to the root level to see them in Sentry.
pdfRanges: this.leanPdfRanges,
pdfFile: Object.assign({}, this.pdfFile, {
ranges: '[extracted]',
// Hide prefetched chunks as these include binary blobs.
prefetched: this.pdfFile.prefetched?.length,
}),
pdfUrl: this.url,
start,
end,
metrics,
})
const isStaleOutputRequest = () =>
performance.now() - this.startTime > STALE_OUTPUT_REQUEST_THRESHOLD_MS
const is404 = err => OError.getFullInfo(err).statusCode === 404
const isFromOutputPDFRequest = err =>
OError.getFullInfo(err).url?.includes?.('/output.pdf') === true
// Do not consider "expected 404s" and network errors as pdf caching
// failures.
// "expected 404s" here include:
// - any stale download request
// Example: The user returns to a browser tab after 1h and scrolls.
// - requests for the main output.pdf file
// A fallback request would not be able to retrieve the PDF either.
const isExpectedError = err =>
(is404(err) || isNetworkError(err)) &&
(isStaleOutputRequest() || isFromOutputPDFRequest(err))
const usesCache = url => {
if (!url) return false
const u = new URL(url)
return (
u.pathname.endsWith(
`build/${this.pdfFile.editorId}-${this.pdfFile.build}/output/output.pdf`
) && u.searchParams.get('clsiserverid') === 'cache'
)
}
const canTryFromCache = err => {
if (!useClsiCache) return false
if (!is404(err)) return false
return !usesCache(OError.getFullInfo(err).url)
}
const getOutputPDFURLFromCache = () => {
if (usesCache(this.url)) return this.url
const u = new URL(this.url)
u.searchParams.set('clsiserverid', 'cache')
u.pathname = u.pathname.replace(
/build\/[a-f0-9-]+\//,
`build/${this.pdfFile.editorId}-${this.pdfFile.build}/`
)
return u.href
}
const fetchFromCache = async () => {
// Try fetching the chunk from clsi-cache
const url = getOutputPDFURLFromCache()
return fallbackRequest({
file: this.pdfFile,
url,
start,
end,
abortSignal,
})
.then(blob => {
// Send the next output.pdf request directly to the cache.
this.url = url
// Only try downloading chunks that were cached previously
this.pdfFile.ranges = this.pdfFile.ranges.filter(r =>
cachedUrls.has(r.hash)
)
return blob
})
.catch(err => {
throw OError.tag(
new PDFJS.MissingPDFException(),
'cache-fallback',
{
statusCode: OError.getFullInfo(err).statusCode,
url: OError.getFullInfo(err).url,
err,
}
)
})
}
fetchRange({
url: this.url,
start,
end,
file: this.pdfFile,
queryForChunks: this.queryForChunks,
metrics,
usageScore,
cachedUrls,
verifyChunks,
prefetchingEnabled,
prefetchLargeEnabled,
cachedUrlLookupEnabled,
abortSignal,
canTryFromCache,
fallbackToCacheURL: getOutputPDFURLFromCache(),
})
.catch(err => {
if (abortSignal.aborted) return
if (canTryFromCache(err)) return fetchFromCache()
if (isExpectedError(err)) {
if (is404(err)) {
// A regular pdf-js request would have seen this 404 as well.
} else {
// Flaky network, switch back to regular pdf-js requests.
metrics.failedCount++
metrics.failedOnce = true
}
throw OError.tag(new PDFJS.MissingPDFException(), 'caching', {
statusCode: OError.getFullInfo(err).statusCode,
url: OError.getFullInfo(err).url,
err,
})
}
metrics.failedCount++
metrics.failedOnce = true
if (!enablePdfCaching) {
throw err // This was a fallback request already. Do not retry.
}
err = OError.tag(err, 'optimized pdf download error', getDebugInfo())
debugConsole.error(err)
captureException(err, {
tags: {
fromPdfCaching: true,
isFromOutputPDFRequest: isFromOutputPDFRequest(err),
},
})
return fallbackRequest({
file: this.pdfFile,
url: this.url,
start,
end,
abortSignal,
}).catch(err => {
if (canTryFromCache(err)) return fetchFromCache()
if (isExpectedError(err)) {
throw OError.tag(new PDFJS.MissingPDFException(), 'fallback', {
statusCode: OError.getFullInfo(err).statusCode,
url: OError.getFullInfo(err).url,
err,
})
}
throw err
})
})
.then(blob => {
if (abortSignal.aborted) return
this.onDataRange(start, blob)
})
.catch(err => {
if (abortSignal.aborted) return
err = OError.tag(err, 'fatal pdf download error', getDebugInfo())
debugConsole.error(err)
if (!(err instanceof PDFJS.MissingPDFException)) {
captureException(err, {
tags: {
fromPdfCaching: true,
isFromOutputPDFRequest: isFromOutputPDFRequest(err),
},
})
}
// Signal error for (subsequent) page load.
this.handleFetchError(err)
})
}
}
return function ({ url, pdfFile, abortController, handleFetchError }) {
if (metrics.failedOnce) {
// Disable pdf caching once any fetch request failed.
// Be trigger-happy here until we reached a stable state of the feature.
return undefined
}
// Latency is collected per preview cycle.
metrics.latencyComputeMax = 0
metrics.latencyComputeTotal = 0
return new PDFDataRangeTransport({
url,
pdfFile,
abortController,
handleFetchError,
})
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,206 @@
import { captureException } from '@/infrastructure/error-reporter'
import { generatePdfCachingTransportFactory } from './pdf-caching-transport'
import { PDFJS, loadPdfDocumentFromUrl, imageResourcesPath } from './pdf-js'
import {
PDFViewer,
EventBus,
PDFLinkService,
LinkTarget,
} from 'pdfjs-dist/web/pdf_viewer.mjs'
import 'pdfjs-dist/web/pdf_viewer.css'
import browser from '@/features/source-editor/extensions/browser'
const DEFAULT_RANGE_CHUNK_SIZE = 128 * 1024 // 128K chunks
export default class PDFJSWrapper {
public readonly viewer: PDFViewer
public readonly eventBus: EventBus
private readonly linkService: PDFLinkService
private readonly pdfCachingTransportFactory: any
private url?: string
// eslint-disable-next-line no-useless-constructor
constructor(public container: HTMLDivElement) {
// create the event bus
this.eventBus = new EventBus()
// create the link service
this.linkService = new PDFLinkService({
eventBus: this.eventBus,
externalLinkTarget: LinkTarget.BLANK,
externalLinkRel: 'noopener',
})
// create the viewer
this.viewer = new PDFViewer({
container: this.container,
eventBus: this.eventBus,
imageResourcesPath,
linkService: this.linkService,
maxCanvasPixels: browser.safari ? 4096 * 4096 : 8192 * 8192, // default is 4096 * 4096, increased for better resolution at high zoom levels (but not in Safari, which struggles with large canvases)
annotationMode: PDFJS.AnnotationMode.ENABLE, // enable annotations but not forms
annotationEditorMode: PDFJS.AnnotationEditorType.DISABLE, // disable annotation editing
})
this.linkService.setViewer(this.viewer)
this.pdfCachingTransportFactory = generatePdfCachingTransportFactory()
}
// load a document from a URL
async loadDocument({
url,
pdfFile,
abortController,
handleFetchError,
}: {
url: string
pdfFile: Record<string, any>
abortController: AbortController
handleFetchError: (error: Error) => void
}) {
this.url = url
const rangeTransport = this.pdfCachingTransportFactory({
url,
pdfFile,
abortController,
handleFetchError,
})
let rangeChunkSize = DEFAULT_RANGE_CHUNK_SIZE
if (rangeTransport && pdfFile.size < 2 * DEFAULT_RANGE_CHUNK_SIZE) {
// pdf.js disables the "bulk" download optimization when providing a
// custom range transport. Restore it by bumping the chunk size.
rangeChunkSize = pdfFile.size
}
try {
const doc = await loadPdfDocumentFromUrl(url, {
rangeChunkSize,
range: rangeTransport,
}).promise
// check that this is still the current URL
if (url !== this.url) {
return
}
this.viewer.setDocument(doc)
this.linkService.setDocument(doc)
return doc
} catch (error: any) {
if (!error || error.name !== 'MissingPDFException') {
captureException(error, {
tags: { handler: 'pdf-preview' },
})
}
throw error
}
}
async fetchAllData() {
await this.viewer.pdfDocument?.getData()
}
// update the current scale value if the container size changes
updateOnResize() {
if (!this.isVisible()) {
return
}
// Use requestAnimationFrame to prevent errors like "ResizeObserver loop
// completed with undelivered notifications" that can occur if updating the
// viewer causes another repaint. The cost of this is that the viewer update
// lags one frame behind, but it's unlikely to matter.
// Further reading: https://github.com/WICG/resize-observer/issues/38
window.requestAnimationFrame(() => {
const currentScaleValue = this.viewer.currentScaleValue
if (
currentScaleValue === 'auto' ||
currentScaleValue === 'page-fit' ||
currentScaleValue === 'page-height' ||
currentScaleValue === 'page-width'
) {
this.viewer.currentScaleValue = currentScaleValue
}
this.viewer.update()
})
}
// get the page and offset of a click event
clickPosition(event: MouseEvent, canvas: HTMLCanvasElement, page: number) {
if (!canvas) {
return
}
const { viewport } = this.viewer.getPageView(page)
const pageRect = canvas.getBoundingClientRect()
const dx = event.clientX - pageRect.left
const dy = event.clientY - pageRect.top
const [left, top] = viewport.convertToPdfPoint(dx, dy)
return {
page,
offset: {
left,
top: viewport.viewBox[3] - top,
},
}
}
// get the current page, offset and page size
get currentPosition() {
const pageIndex = this.viewer.currentPageNumber - 1
const pageView = this.viewer.getPageView(pageIndex)
const pageRect = pageView.div.getBoundingClientRect()
const containerRect = this.container.getBoundingClientRect()
const dy = containerRect.top - pageRect.top
const dx = containerRect.left - pageRect.left
const [left, top] = pageView.viewport.convertToPdfPoint(dx, dy)
const [, , width, height] = pageView.viewport.viewBox
return {
page: pageIndex,
offset: { top, left },
pageSize: { height, width },
}
}
scrollToPosition(position: Record<string, any>, scale = null) {
const destArray = [
null,
{
name: 'XYZ', // 'XYZ' = scroll to the given coordinates
},
position.offset.left,
position.offset.top,
scale,
]
this.viewer.scrollPageIntoView({
pageNumber: position.page + 1,
destArray,
})
// scroll the page left and down by an extra few pixels to account for the pdf.js viewer page border
const pageIndex = this.viewer.currentPageNumber - 1
const pageView = this.viewer.getPageView(pageIndex)
const offset = parseFloat(getComputedStyle(pageView.div).borderWidth)
this.viewer.container.scrollBy({
top: -offset,
left: -offset,
})
}
isVisible() {
return this.viewer.container.offsetParent !== null
}
}

View File

@@ -0,0 +1,33 @@
import * as PDFJS from 'pdfjs-dist'
import type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api'
export { PDFJS }
PDFJS.GlobalWorkerOptions.workerPort = new Worker(
/* webpackChunkName: "pdf-worker" */
new URL('pdfjs-dist/build/pdf.worker.mjs', import.meta.url) // NOTE: .mjs extension
)
export const imageResourcesPath = '/images/pdfjs-dist/'
const cMapUrl = '/js/pdfjs-dist/cmaps/'
const standardFontDataUrl = '/fonts/pdfjs-dist/'
const params = new URLSearchParams(window.location.search)
const disableFontFace = params.get('disable-font-face') === 'true'
const disableStream = process.env.NODE_ENV !== 'test'
export const loadPdfDocumentFromUrl = (
url: string,
options: Partial<DocumentInitParameters> = {}
) =>
PDFJS.getDocument({
url,
cMapUrl,
standardFontDataUrl,
disableFontFace,
disableAutoFetch: true, // only fetch the data needed for the displayed pages
disableStream,
isEvalSupported: false,
enableXfa: false, // default is false (2021-10-12), but set explicitly to be sure
...options,
})

View File

@@ -0,0 +1,42 @@
import React from 'react'
import { CompileOutputFile } from '../../../../../types/compile'
export type LogEntry = {
raw: string
level: ErrorLevel
key: string
file?: string
column?: number
line?: number
ruleId?: string
message?: string
content?: string
type?: string
messageComponent?: React.ReactNode
contentDetails?: string[]
}
export type ErrorLevel =
| 'error'
| 'warning'
| 'info'
| 'typesetting'
| 'raw'
| 'success'
export type SourceLocation = {
file?: string
// `line should be either a number or null (i.e. not required), but currently sometimes we get
// an empty string (from BibTeX errors).
line?: number | string | null
column?: number
}
export type PdfFileData = CompileOutputFile
type PdfFileArchiveData = CompileOutputFile & { fileCount: number }
export type PdfFileDataList = {
top: PdfFileData[]
other: PdfFileData[]
archive?: PdfFileArchiveData
}