first commit

2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions
--- a/services/web/frontend/js/features/pdf-preview/util/compiler.js
+++ b/services/web/frontend/js/features/pdf-preview/util/compiler.js
@@ -0,0 +1,233 @@
+import { isMainFile } from './editor-files'
+import getMeta from '../../../utils/meta'
+import { deleteJSON, postJSON } from '../../../infrastructure/fetch-json'
+import { debounce } from 'lodash'
+import { EDITOR_SESSION_ID, trackPdfDownload } from './metrics'
+import { enablePdfCaching } from './pdf-caching-flags'
+import { debugConsole } from '@/utils/debugging'
+import { signalWithTimeout } from '@/utils/abort-signal'
+
+const AUTO_COMPILE_MAX_WAIT = 5000
+// We add a 2 second debounce to sending user changes to server if they aren't
+// collaborating with anyone. This needs to be higher than SINGLE_USER_FLUSH_DELAY, and allow for
+// client to server latency, otherwise we compile before the op reaches the server
+// and then again on ack.
+const AUTO_COMPILE_DEBOUNCE = 2500
+
+// If there is a pending op, wait for it to be saved before compiling
+const PENDING_OP_MAX_WAIT = 10000
+
+const searchParams = new URLSearchParams(window.location.search)
+
+export default class DocumentCompiler {
+  constructor({
+    compilingRef,
+    projectId,
+    setChangedAt,
+    setCompiling,
+    setData,
+    setFirstRenderDone,
+    setDeliveryLatencies,
+    setError,
+    cleanupCompileResult,
+    signal,
+    openDocs,
+  }) {
+    this.compilingRef = compilingRef
+    this.projectId = projectId
+    this.setChangedAt = setChangedAt
+    this.setCompiling = setCompiling
+    this.setData = setData
+    this.setFirstRenderDone = setFirstRenderDone
+    this.setDeliveryLatencies = setDeliveryLatencies
+    this.setError = setError
+    this.cleanupCompileResult = cleanupCompileResult
+    this.signal = signal
+    this.openDocs = openDocs
+
+    this.projectRootDocId = null
+    this.clsiServerId = null
+    this.currentDoc = null
+    this.error = undefined
+    this.timer = 0
+    this.defaultOptions = {
+      draft: false,
+      stopOnFirstError: false,
+    }
+
+    this.debouncedAutoCompile = debounce(
+      () => {
+        this.compile({ isAutoCompileOnChange: true })
+      },
+      AUTO_COMPILE_DEBOUNCE,
+      {
+        maxWait: AUTO_COMPILE_MAX_WAIT,
+      }
+    )
+  }
+
+  // The main "compile" function.
+  // Call this directly to run a compile now, otherwise call debouncedAutoCompile.
+  async compile(options = {}) {
+    options = { ...this.defaultOptions, ...options }
+
+    if (options.isAutoCompileOnLoad && getMeta('ol-preventCompileOnLoad')) {
+      return
+    }
+
+    // set "compiling" to true (in the React component's state), and return if it was already true
+    const wasCompiling = this.compilingRef.current
+    this.setCompiling(true)
+
+    if (wasCompiling) {
+      if (options.isAutoCompileOnChange) {
+        this.debouncedAutoCompile()
+      }
+      return
+    }
+
+    try {
+      await this.openDocs.awaitBufferedOps(
+        signalWithTimeout(this.signal, PENDING_OP_MAX_WAIT)
+      )
+
+      // reset values
+      this.setChangedAt(0) // TODO: wait for doc:saved?
+      this.validationIssues = undefined
+
+      const params = this.buildCompileParams(options)
+
+      const t0 = performance.now()
+
+      const rootDocId = this.getRootDocOverrideId()
+
+      const body = {
+        rootDoc_id: rootDocId,
+        draft: options.draft,
+        check: 'silent', // NOTE: 'error' and 'validate' are possible, but unused
+        // use incremental compile for all users but revert to a full compile
+        // if there was previously a server error
+        incrementalCompilesEnabled: !this.error,
+        stopOnFirstError: options.stopOnFirstError,
+        editorId: EDITOR_SESSION_ID,
+      }
+
+      const data = await postJSON(
+        `/project/${this.projectId}/compile?${params}`,
+        { body, signal: this.signal }
+      )
+
+      const compileTimeClientE2E = Math.ceil(performance.now() - t0)
+      const { deliveryLatencies, firstRenderDone } = trackPdfDownload(
+        data,
+        compileTimeClientE2E,
+        t0
+      )
+      this.setDeliveryLatencies(() => deliveryLatencies)
+      this.setFirstRenderDone(() => firstRenderDone)
+
+      // unset the error before it's set again later, so that components are recreated and events are tracked
+      this.setError(undefined)
+
+      data.options = options
+      data.rootDocId = rootDocId
+      if (data.clsiServerId) {
+        this.clsiServerId = data.clsiServerId
+      }
+      this.setData(data)
+    } catch (error) {
+      debugConsole.error(error)
+      this.cleanupCompileResult()
+      this.setError(error.info?.statusCode === 429 ? 'rate-limited' : 'error')
+    } finally {
+      this.setCompiling(false)
+    }
+  }
+
+  // parse the text of the current doc in the editor
+  // if it contains "\documentclass" then use this as the root doc
+  getRootDocOverrideId() {
+    // only override when not in the root doc itself
+    if (this.currentDoc && this.currentDoc.doc_id !== this.projectRootDocId) {
+      const snapshot = this.currentDoc.getSnapshot()
+
+      if (snapshot && isMainFile(snapshot)) {
+        return this.currentDoc.doc_id
+      }
+    }
+
+    return null
+  }
+
+  // build the query parameters added to post-compile requests
+  buildPostCompileParams() {
+    const params = new URLSearchParams()
+
+    // the id of the CLSI server that processed the previous compile request
+    if (this.clsiServerId) {
+      params.set('clsiserverid', this.clsiServerId)
+    }
+
+    return params
+  }
+
+  // build the query parameters for the compile request
+  buildCompileParams(options) {
+    const params = new URLSearchParams()
+
+    // note: no clsiserverid query param is set on "compile" requests,
+    // as this is added in the backend by the web api
+
+    // tell the server whether this is an automatic or manual compile request
+    if (options.isAutoCompileOnLoad || options.isAutoCompileOnChange) {
+      params.set('auto_compile', 'true')
+    }
+
+    // use the feature flag to enable PDF caching
+    if (enablePdfCaching) {
+      params.set('enable_pdf_caching', 'true')
+    }
+
+    // use the feature flag to enable "file line errors"
+    if (searchParams.get('file_line_errors') === 'true') {
+      params.file_line_errors = 'true'
+    }
+
+    return params
+  }
+
+  // send a request to stop the current compile
+  stopCompile() {
+    // NOTE: no stoppingCompile state, as this should happen fairly quickly
+    // and doesn't matter if it runs twice.
+
+    const params = this.buildPostCompileParams()
+
+    return postJSON(`/project/${this.projectId}/compile/stop?${params}`, {
+      signal: this.signal,
+    })
+      .catch(error => {
+        debugConsole.error(error)
+        this.setError('error')
+      })
+      .finally(() => {
+        this.setCompiling(false)
+      })
+  }
+
+  // send a request to clear the cache
+  clearCache() {
+    const params = this.buildPostCompileParams()
+
+    return deleteJSON(`/project/${this.projectId}/output?${params}`, {
+      signal: this.signal,
+    }).catch(error => {
+      debugConsole.error(error)
+      this.setError('clear-cache')
+    })
+  }
+
+  setOption(option, value) {
+    this.defaultOptions[option] = value
+  }
+}
--- a/services/web/frontend/js/features/pdf-preview/util/editor-files.js
+++ b/services/web/frontend/js/features/pdf-preview/util/editor-files.js
@@ -0,0 +1,4 @@
+const documentClassRe = /^[^%]*\\documentclass/
+
+export const isMainFile = doc =>
+  doc.split('\n').some(line => documentClassRe.test(line))
--- a/services/web/frontend/js/features/pdf-preview/util/file-list.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/file-list.ts
@@ -0,0 +1,88 @@
+import {
+  CompileOutputFile,
+  CompileResponseData,
+} from '../../../../../types/compile'
+import { PdfFileDataList } from '@/features/pdf-preview/util/types'
+
+const topFileTypes = ['bbl', 'gls', 'ind']
+// NOTE: Updating this list requires a corresponding change in
+// * services/clsi/app/js/OutputFileArchiveManager.js
+const ignoreFiles = ['output.fls', 'output.fdb_latexmk']
+
+export function buildFileList(
+  outputFiles: Map<string, CompileOutputFile>,
+  {
+    clsiServerId,
+    compileGroup,
+    outputFilesArchive,
+    fromCache = false,
+  }: CompileResponseData
+): PdfFileDataList {
+  const files: PdfFileDataList = { top: [], other: [] }
+
+  if (outputFiles) {
+    const params = new URLSearchParams()
+
+    if (fromCache) {
+      params.set('clsiserverid', 'cache')
+    } else if (clsiServerId) {
+      params.set('clsiserverid', clsiServerId)
+    }
+    if (compileGroup) {
+      params.set('compileGroup', compileGroup)
+    }
+
+    const queryString = params.toString()
+
+    const allFiles = []
+
+    // filter out ignored files and set some properties
+    for (const file of outputFiles.values()) {
+      if (!ignoreFiles.includes(file.path)) {
+        file.main = file.path.startsWith('output.')
+
+        if (queryString.length) {
+          file.url += `?${queryString}`
+        }
+
+        allFiles.push(file)
+      }
+    }
+
+    // sort main files first, then alphabetical
+    allFiles.sort((a, b) => {
+      if (a.main && !b.main) {
+        return -1
+      }
+
+      if (b.main && !a.main) {
+        return 1
+      }
+
+      return a.path.localeCompare(b.path, undefined, { numeric: true })
+    })
+
+    // group files into "top" and "other"
+    for (const file of allFiles) {
+      if (topFileTypes.includes(file.type)) {
+        files.top.push(file)
+      } else if (!(file.type === 'pdf' && file.main === true)) {
+        files.other.push(file)
+      }
+    }
+
+    const archivableFiles = [...files.top, ...files.other]
+
+    if (outputFilesArchive && archivableFiles.length > 0) {
+      archivableFiles.forEach(file => params.append('files', file.path))
+
+      files.archive = {
+        ...outputFilesArchive,
+        fileCount: archivableFiles.length,
+        url: `${outputFilesArchive.url}?${params.toString()}`,
+      }
+    }
+  }
+
+  return files
+}
--- a/services/web/frontend/js/features/pdf-preview/util/highlights.js
+++ b/services/web/frontend/js/features/pdf-preview/util/highlights.js
@@ -0,0 +1,35 @@
+import { PDFJS } from '@/features/pdf-preview/util/pdf-js'
+
+export function buildHighlightElement(highlight, viewer) {
+  const pageView = viewer.getPageView(highlight.page - 1)
+
+  const viewport = pageView.viewport
+
+  const height = viewport.viewBox[3]
+
+  const rect = viewport.convertToViewportRectangle([
+    highlight.h, // xMin
+    height - (highlight.v + highlight.height) + 10, // yMin
+    highlight.h + highlight.width, // xMax
+    height - highlight.v + 10, // yMax
+  ])
+
+  const [left, top, right, bottom] = PDFJS.Util.normalizeRect(rect)
+
+  const element = document.createElement('div')
+  element.style.left = Math.floor(pageView.div.offsetLeft + left) + 'px'
+  element.style.top = Math.floor(pageView.div.offsetTop + top) + 'px'
+  element.style.width = Math.ceil(right - left) + 'px'
+  element.style.height = Math.ceil(bottom - top) + 'px'
+  element.style.backgroundColor = 'rgba(255,255,0)'
+  element.style.position = 'absolute'
+  element.style.display = 'inline-block'
+  element.style.scrollMargin = '72px'
+  element.style.pointerEvents = 'none'
+  element.style.opacity = '0'
+  element.style.transition = 'opacity 1s'
+
+  viewer.viewer?.append(element)
+
+  return element
+}
--- a/services/web/frontend/js/features/pdf-preview/util/metrics.js
+++ b/services/web/frontend/js/features/pdf-preview/util/metrics.js
@@ -0,0 +1,66 @@
+import { v4 as uuid } from 'uuid'
+import { sendMB } from '../../../infrastructure/event-tracking'
+import { trackPdfDownloadEnabled } from './pdf-caching-flags'
+import { debugConsole } from '@/utils/debugging'
+
+// VERSION should get incremented when making changes to caching behavior or
+//  adjusting metrics collection.
+const VERSION = 9
+
+// editing session id
+export const EDITOR_SESSION_ID = uuid()
+
+const pdfCachingMetrics = {
+  viewerId: EDITOR_SESSION_ID,
+}
+
+export function getPdfCachingMetrics() {
+  return pdfCachingMetrics
+}
+
+export function trackPdfDownload(response, compileTimeClientE2E, t0) {
+  const { timings, pdfCachingMinChunkSize } = response
+
+  const deliveryLatencies = {
+    compileTimeClientE2E,
+    compileTimeServerE2E: timings?.compileE2E,
+  }
+
+  // There can be multiple "first" renderings with two pdf viewers.
+  // E.g. two pdf detach tabs or pdf detacher plus pdf detach.
+  // Let the pdfCachingMetrics round trip to account for pdf-detach.
+  let isFirstRender = true
+  function firstRenderDone({ latencyFetch, latencyRender, pdfCachingMetrics }) {
+    if (!isFirstRender) return
+    isFirstRender = false
+
+    deliveryLatencies.totalDeliveryTime = Math.ceil(performance.now() - t0)
+    deliveryLatencies.latencyFetch = latencyFetch
+    if (latencyRender) {
+      deliveryLatencies.latencyRender = latencyRender
+    }
+    if (trackPdfDownloadEnabled) {
+      // Submit latency along with compile context.
+      submitCompileMetrics({
+        pdfCachingMinChunkSize,
+        ...deliveryLatencies,
+        ...pdfCachingMetrics,
+      })
+    }
+  }
+
+  return {
+    deliveryLatencies,
+    firstRenderDone,
+  }
+}
+
+function submitCompileMetrics(metrics) {
+  const leanMetrics = {
+    version: VERSION,
+    ...metrics,
+    id: EDITOR_SESSION_ID,
+  }
+  debugConsole.log('/event/compile-metrics', JSON.stringify(leanMetrics))
+  sendMB('compile-metrics-v6', leanMetrics)
+}
--- a/services/web/frontend/js/features/pdf-preview/util/output-files.js
+++ b/services/web/frontend/js/features/pdf-preview/util/output-files.js
@@ -0,0 +1,276 @@
+import HumanReadableLogs from '../../../ide/human-readable-logs/HumanReadableLogs'
+import BibLogParser from '../../../ide/log-parser/bib-log-parser'
+import { enablePdfCaching } from './pdf-caching-flags'
+import { debugConsole } from '@/utils/debugging'
+import { dirname, findEntityByPath } from '@/features/file-tree/util/path'
+import '@/utils/readable-stream-async-iterator-polyfill'
+import { EDITOR_SESSION_ID } from '@/features/pdf-preview/util/metrics'
+
+// Warnings that may disappear after a second LaTeX pass
+const TRANSIENT_WARNING_REGEX = /^(Reference|Citation).+undefined on input line/
+
+const MAX_LOG_SIZE = 1024 * 1024 // 1MB
+const MAX_BIB_LOG_SIZE_PER_FILE = MAX_LOG_SIZE
+
+export function handleOutputFiles(outputFiles, projectId, data) {
+  const outputFile = outputFiles.get('output.pdf')
+  if (!outputFile) return null
+
+  outputFile.editorId = outputFile.editorId || EDITOR_SESSION_ID
+
+  // build the URL for viewing the PDF in the preview UI
+  const params = new URLSearchParams()
+  if (data.compileGroup) {
+    params.set('compileGroup', data.compileGroup)
+  }
+
+  if (data.clsiServerId) {
+    params.set('clsiserverid', data.clsiServerId)
+  }
+
+  if (enablePdfCaching) {
+    // Tag traffic that uses the pdf caching logic.
+    params.set('enable_pdf_caching', 'true')
+  }
+
+  outputFile.pdfUrl = `${buildURL(
+    outputFile,
+    data.pdfDownloadDomain
+  )}?${params}`
+
+  if (data.fromCache) {
+    outputFile.pdfDownloadUrl = outputFile.downloadURL
+  } else {
+    // build the URL for downloading the PDF
+    params.set('popupDownload', 'true') // save PDF download as file
+
+    outputFile.pdfDownloadUrl = `/download/project/${projectId}/build/${outputFile.build}/output/output.pdf?${params}`
+  }
+
+  return outputFile
+}
+
+let nextEntryId = 1
+
+function generateEntryKey() {
+  return 'compile-log-entry-' + nextEntryId++
+}
+
+export const handleLogFiles = async (outputFiles, data, signal) => {
+  const result = {
+    log: null,
+    logEntries: {
+      errors: [],
+      warnings: [],
+      typesetting: [],
+    },
+  }
+
+  function accumulateResults(newEntries, type) {
+    for (const key in result.logEntries) {
+      if (newEntries[key]) {
+        for (const entry of newEntries[key]) {
+          if (type) {
+            entry.type = newEntries.type
+          }
+          if (entry.file) {
+            entry.file = normalizeFilePath(entry.file)
+          }
+          entry.key = generateEntryKey()
+        }
+        result.logEntries[key].push(...newEntries[key])
+      }
+    }
+  }
+
+  const logFile = outputFiles.get('output.log')
+
+  if (logFile) {
+    result.log = await fetchFileWithSizeLimit(
+      buildURL(logFile, data.pdfDownloadDomain),
+      signal,
+      MAX_LOG_SIZE
+    )
+    try {
+      let { errors, warnings, typesetting } = HumanReadableLogs.parse(
+        result.log,
+        {
+          ignoreDuplicates: true,
+        }
+      )
+
+      if (data.status === 'stopped-on-first-error') {
+        // Hide warnings that could disappear after a second pass
+        warnings = warnings.filter(warning => !isTransientWarning(warning))
+      }
+
+      accumulateResults({ errors, warnings, typesetting })
+    } catch (e) {
+      debugConsole.warn(e) // ignore failure to parse the log file, but log a warning
+    }
+  }
+
+  const blgFiles = []
+
+  for (const [filename, file] of outputFiles) {
+    if (filename.endsWith('.blg')) {
+      blgFiles.push(file)
+    }
+  }
+  for (const blgFile of blgFiles) {
+    const log = await fetchFileWithSizeLimit(
+      buildURL(blgFile, data.pdfDownloadDomain),
+      signal,
+      MAX_BIB_LOG_SIZE_PER_FILE
+    )
+    try {
+      const { errors, warnings } = new BibLogParser(log, {
+        maxErrors: 100,
+      }).parse()
+      accumulateResults({ errors, warnings }, 'BibTeX:')
+    } catch (e) {
+      // BibLog parsing errors are ignored
+    }
+  }
+
+  result.logEntries.all = [
+    ...result.logEntries.errors,
+    ...result.logEntries.warnings,
+    ...result.logEntries.typesetting,
+  ]
+
+  return result
+}
+
+export function buildLogEntryAnnotations(entries, fileTreeData, rootDocId) {
+  const rootDocDirname = dirname(fileTreeData, rootDocId)
+
+  const logEntryAnnotations = {}
+  const seenLine = {}
+
+  for (const entry of entries) {
+    if (entry.file) {
+      entry.file = normalizeFilePath(entry.file, rootDocDirname)
+
+      const entity = findEntityByPath(fileTreeData, entry.file)?.entity
+
+      if (entity) {
+        if (!(entity._id in logEntryAnnotations)) {
+          logEntryAnnotations[entity._id] = []
+        }
+
+        const annotation = {
+          id: entry.key,
+          entryIndex: logEntryAnnotations[entity._id].length, // used for maintaining the order of items on the same line
+          row: entry.line - 1,
+          type: entry.level === 'error' ? 'error' : 'warning',
+          text: entry.message,
+          source: 'compile', // NOTE: this is used in Ace for filtering the annotations
+          ruleId: entry.ruleId,
+          command: entry.command,
+        }
+
+        // set firstOnLine for the first non-typesetting annotation on a line
+        if (entry.level !== 'typesetting') {
+          if (!seenLine[entry.line]) {
+            annotation.firstOnLine = true
+            seenLine[entry.line] = true
+          }
+        }
+
+        logEntryAnnotations[entity._id].push(annotation)
+      }
+    }
+  }
+
+  return logEntryAnnotations
+}
+
+export const buildRuleCounts = (entries = []) => {
+  const counts = {}
+  for (const entry of entries) {
+    const key = `${entry.level}_${entry.ruleId}`
+    counts[key] = counts[key] ? counts[key] + 1 : 1
+  }
+  return counts
+}
+
+export const buildRuleDeltas = (ruleCounts, previousRuleCounts) => {
+  const counts = {}
+
+  // keys that are defined in the current log entries
+  for (const [key, value] of Object.entries(ruleCounts)) {
+    const previousValue = previousRuleCounts[key] ?? 0
+    counts[`delta_${key}`] = value - previousValue
+  }
+
+  // keys that are no longer defined in the current log entries
+  for (const [key, value] of Object.entries(previousRuleCounts)) {
+    if (!(key in ruleCounts)) {
+      counts[key] = 0
+      counts[`delta_${key}`] = -value
+    }
+  }
+
+  return counts
+}
+
+function buildURL(file, pdfDownloadDomain) {
+  if (file.build && pdfDownloadDomain) {
+    // Downloads from the compiles domain must include a build id.
+    // The build id is used implicitly for access control.
+    return `${pdfDownloadDomain}${file.url}`
+  }
+  // Go through web instead, which uses mongo for checking project access.
+  return `${window.origin}${file.url}`
+}
+
+function normalizeFilePath(path, rootDocDirname) {
+  path = path.replace(/\/\//g, '/')
+  path = path.replace(
+    /^.*\/compiles\/[0-9a-f]{24}(-[0-9a-f]{24})?\/(\.\/)?/,
+    ''
+  )
+
+  path = path.replace(/^\/compile\//, '')
+
+  if (rootDocDirname) {
+    path = path.replace(/^\.\//, rootDocDirname + '/')
+  }
+
+  return path
+}
+
+function isTransientWarning(warning) {
+  return TRANSIENT_WARNING_REGEX.test(warning.message)
+}
+
+async function fetchFileWithSizeLimit(url, signal, maxSize) {
+  let result = ''
+  try {
+    const abortController = new AbortController()
+    // abort fetching the log file if the main signal is aborted
+    signal.addEventListener('abort', () => {
+      abortController.abort()
+    })
+
+    const response = await fetch(url, {
+      signal: abortController.signal,
+    })
+
+    if (!response.ok) {
+      throw new Error('Failed to fetch log file')
+    }
+
+    const reader = response.body.pipeThrough(new TextDecoderStream())
+    for await (const chunk of reader) {
+      result += chunk
+      if (result.length > maxSize) {
+        abortController.abort()
+      }
+    }
+  } catch (e) {
+    debugConsole.warn(e) // ignore failure to fetch the log file, but log a warning
+  }
+  return result
+}
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-flags.js
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-flags.js
@@ -0,0 +1,29 @@
+import getMeta from '../../../utils/meta'
+import { debugConsole } from '@/utils/debugging'
+
+const hasTextEncoder = typeof TextEncoder !== 'undefined'
+if (!hasTextEncoder) {
+  debugConsole.warn('TextEncoder is not available. Disabling pdf-caching.')
+}
+
+const isOpera =
+  Array.isArray(navigator.userAgentData?.brands) &&
+  navigator.userAgentData.brands.some(b => b.brand === 'Opera')
+if (isOpera) {
+  debugConsole.warn('Browser cache is limited in Opera. Disabling pdf-caching.')
+}
+
+function isFlagEnabled(flag) {
+  if (!hasTextEncoder) return false
+  if (isOpera) return false
+  return getMeta('ol-splitTestVariants')?.[flag] === 'enabled'
+}
+
+export const cachedUrlLookupEnabled = isFlagEnabled(
+  'pdf-caching-cached-url-lookup'
+)
+export const prefetchingEnabled = isFlagEnabled('pdf-caching-prefetching')
+export const prefetchLargeEnabled = isFlagEnabled('pdf-caching-prefetch-large')
+export const enablePdfCaching = isFlagEnabled('pdf-caching-mode')
+export const trackPdfDownloadEnabled = isFlagEnabled('track-pdf-download')
+export const useClsiCache = isFlagEnabled('fall-back-to-clsi-cache')
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.js
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.js
@@ -0,0 +1,267 @@
+import OError from '@overleaf/o-error'
+import { fallbackRequest, fetchRange } from './pdf-caching'
+import { captureException } from '@/infrastructure/error-reporter'
+import { EDITOR_SESSION_ID, getPdfCachingMetrics } from './metrics'
+import {
+  cachedUrlLookupEnabled,
+  enablePdfCaching,
+  prefetchingEnabled,
+  prefetchLargeEnabled,
+  trackPdfDownloadEnabled,
+  useClsiCache,
+} from './pdf-caching-flags'
+import { isNetworkError } from '@/utils/is-network-error'
+import { debugConsole } from '@/utils/debugging'
+import { PDFJS } from './pdf-js'
+
+// 30 seconds: The shutdown grace period of a clsi pre-emp instance.
+const STALE_OUTPUT_REQUEST_THRESHOLD_MS = 30 * 1000
+
+export function generatePdfCachingTransportFactory() {
+  // NOTE: The custom transport can be used for tracking download volume.
+  if (!enablePdfCaching && !trackPdfDownloadEnabled) {
+    return () => undefined
+  }
+  const usageScore = new Map()
+  const cachedUrls = new Map()
+  const metrics = Object.assign(getPdfCachingMetrics(), {
+    failedCount: 0,
+    failedOnce: false,
+    tooMuchBandwidthCount: 0,
+    tooManyRequestsCount: 0,
+    cachedCount: 0,
+    cachedBytes: 0,
+    fetchedCount: 0,
+    fetchedBytes: 0,
+    latencyComputeMax: 0,
+    latencyComputeTotal: 0,
+    requestedCount: 0,
+    requestedBytes: 0,
+    oldUrlHitCount: 0,
+    oldUrlMissCount: 0,
+    enablePdfCaching,
+    prefetchingEnabled,
+    prefetchLargeEnabled,
+    cachedUrlLookupEnabled,
+  })
+  const verifyChunks =
+    new URLSearchParams(window.location.search).get('verify_chunks') === 'true'
+
+  class PDFDataRangeTransport extends PDFJS.PDFDataRangeTransport {
+    constructor({ url, pdfFile, abortController, handleFetchError }) {
+      super(pdfFile.size, new Uint8Array())
+      this.url = url
+      pdfFile.ranges = pdfFile.ranges || []
+      pdfFile.editorId = pdfFile.editorId || EDITOR_SESSION_ID
+      this.pdfFile = pdfFile
+      // Clone the chunks as the objectId field is encoded to a Uint8Array.
+      this.leanPdfRanges = pdfFile.ranges.map(r => Object.assign({}, r))
+      this.handleFetchError = handleFetchError
+      this.abortController = abortController
+      this.startTime = performance.now()
+
+      const params = new URL(url).searchParams
+      // drop no needed params
+      params.delete('enable_pdf_caching')
+      params.delete('verify_chunks')
+      this.queryForChunks = params.toString()
+    }
+
+    abort() {
+      this.abortController.abort()
+    }
+
+    requestDataRange(start, end) {
+      const abortSignal = this.abortController.signal
+      const getDebugInfo = () => ({
+        // Sentry does not serialize objects in twice nested objects.
+        // Move the ranges to the root level to see them in Sentry.
+        pdfRanges: this.leanPdfRanges,
+        pdfFile: Object.assign({}, this.pdfFile, {
+          ranges: '[extracted]',
+          // Hide prefetched chunks as these include binary blobs.
+          prefetched: this.pdfFile.prefetched?.length,
+        }),
+        pdfUrl: this.url,
+        start,
+        end,
+        metrics,
+      })
+
+      const isStaleOutputRequest = () =>
+        performance.now() - this.startTime > STALE_OUTPUT_REQUEST_THRESHOLD_MS
+      const is404 = err => OError.getFullInfo(err).statusCode === 404
+      const isFromOutputPDFRequest = err =>
+        OError.getFullInfo(err).url?.includes?.('/output.pdf') === true
+
+      // Do not consider "expected 404s" and network errors as pdf caching
+      //  failures.
+      // "expected 404s" here include:
+      // - any stale download request
+      //   Example: The user returns to a browser tab after 1h and scrolls.
+      // - requests for the main output.pdf file
+      //   A fallback request would not be able to retrieve the PDF either.
+      const isExpectedError = err =>
+        (is404(err) || isNetworkError(err)) &&
+        (isStaleOutputRequest() || isFromOutputPDFRequest(err))
+
+      const usesCache = url => {
+        if (!url) return false
+        const u = new URL(url)
+        return (
+          u.pathname.endsWith(
+            `build/${this.pdfFile.editorId}-${this.pdfFile.build}/output/output.pdf`
+          ) && u.searchParams.get('clsiserverid') === 'cache'
+        )
+      }
+      const canTryFromCache = err => {
+        if (!useClsiCache) return false
+        if (!is404(err)) return false
+        return !usesCache(OError.getFullInfo(err).url)
+      }
+      const getOutputPDFURLFromCache = () => {
+        if (usesCache(this.url)) return this.url
+        const u = new URL(this.url)
+        u.searchParams.set('clsiserverid', 'cache')
+        u.pathname = u.pathname.replace(
+          /build\/[a-f0-9-]+\//,
+          `build/${this.pdfFile.editorId}-${this.pdfFile.build}/`
+        )
+        return u.href
+      }
+      const fetchFromCache = async () => {
+        // Try fetching the chunk from clsi-cache
+        const url = getOutputPDFURLFromCache()
+        return fallbackRequest({
+          file: this.pdfFile,
+          url,
+          start,
+          end,
+          abortSignal,
+        })
+          .then(blob => {
+            // Send the next output.pdf request directly to the cache.
+            this.url = url
+            // Only try downloading chunks that were cached previously
+            this.pdfFile.ranges = this.pdfFile.ranges.filter(r =>
+              cachedUrls.has(r.hash)
+            )
+            return blob
+          })
+          .catch(err => {
+            throw OError.tag(
+              new PDFJS.MissingPDFException(),
+              'cache-fallback',
+              {
+                statusCode: OError.getFullInfo(err).statusCode,
+                url: OError.getFullInfo(err).url,
+                err,
+              }
+            )
+          })
+      }
+
+      fetchRange({
+        url: this.url,
+        start,
+        end,
+        file: this.pdfFile,
+        queryForChunks: this.queryForChunks,
+        metrics,
+        usageScore,
+        cachedUrls,
+        verifyChunks,
+        prefetchingEnabled,
+        prefetchLargeEnabled,
+        cachedUrlLookupEnabled,
+        abortSignal,
+        canTryFromCache,
+        fallbackToCacheURL: getOutputPDFURLFromCache(),
+      })
+        .catch(err => {
+          if (abortSignal.aborted) return
+          if (canTryFromCache(err)) return fetchFromCache()
+          if (isExpectedError(err)) {
+            if (is404(err)) {
+              // A regular pdf-js request would have seen this 404 as well.
+            } else {
+              // Flaky network, switch back to regular pdf-js requests.
+              metrics.failedCount++
+              metrics.failedOnce = true
+            }
+            throw OError.tag(new PDFJS.MissingPDFException(), 'caching', {
+              statusCode: OError.getFullInfo(err).statusCode,
+              url: OError.getFullInfo(err).url,
+              err,
+            })
+          }
+          metrics.failedCount++
+          metrics.failedOnce = true
+          if (!enablePdfCaching) {
+            throw err // This was a fallback request already. Do not retry.
+          }
+          err = OError.tag(err, 'optimized pdf download error', getDebugInfo())
+          debugConsole.error(err)
+          captureException(err, {
+            tags: {
+              fromPdfCaching: true,
+              isFromOutputPDFRequest: isFromOutputPDFRequest(err),
+            },
+          })
+          return fallbackRequest({
+            file: this.pdfFile,
+            url: this.url,
+            start,
+            end,
+            abortSignal,
+          }).catch(err => {
+            if (canTryFromCache(err)) return fetchFromCache()
+            if (isExpectedError(err)) {
+              throw OError.tag(new PDFJS.MissingPDFException(), 'fallback', {
+                statusCode: OError.getFullInfo(err).statusCode,
+                url: OError.getFullInfo(err).url,
+                err,
+              })
+            }
+            throw err
+          })
+        })
+        .then(blob => {
+          if (abortSignal.aborted) return
+          this.onDataRange(start, blob)
+        })
+        .catch(err => {
+          if (abortSignal.aborted) return
+          err = OError.tag(err, 'fatal pdf download error', getDebugInfo())
+          debugConsole.error(err)
+          if (!(err instanceof PDFJS.MissingPDFException)) {
+            captureException(err, {
+              tags: {
+                fromPdfCaching: true,
+                isFromOutputPDFRequest: isFromOutputPDFRequest(err),
+              },
+            })
+          }
+          // Signal error for (subsequent) page load.
+          this.handleFetchError(err)
+        })
+    }
+  }
+
+  return function ({ url, pdfFile, abortController, handleFetchError }) {
+    if (metrics.failedOnce) {
+      // Disable pdf caching once any fetch request failed.
+      // Be trigger-happy here until we reached a stable state of the feature.
+      return undefined
+    }
+    // Latency is collected per preview cycle.
+    metrics.latencyComputeMax = 0
+    metrics.latencyComputeTotal = 0
+    return new PDFDataRangeTransport({
+      url,
+      pdfFile,
+      abortController,
+      handleFetchError,
+    })
+  }
+}
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching.js
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching.js
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-js-wrapper.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-js-wrapper.ts
@@ -0,0 +1,206 @@
+import { captureException } from '@/infrastructure/error-reporter'
+import { generatePdfCachingTransportFactory } from './pdf-caching-transport'
+import { PDFJS, loadPdfDocumentFromUrl, imageResourcesPath } from './pdf-js'
+import {
+  PDFViewer,
+  EventBus,
+  PDFLinkService,
+  LinkTarget,
+} from 'pdfjs-dist/web/pdf_viewer.mjs'
+import 'pdfjs-dist/web/pdf_viewer.css'
+import browser from '@/features/source-editor/extensions/browser'
+
+const DEFAULT_RANGE_CHUNK_SIZE = 128 * 1024 // 128K chunks
+
+export default class PDFJSWrapper {
+  public readonly viewer: PDFViewer
+  public readonly eventBus: EventBus
+  private readonly linkService: PDFLinkService
+  private readonly pdfCachingTransportFactory: any
+  private url?: string
+
+  // eslint-disable-next-line no-useless-constructor
+  constructor(public container: HTMLDivElement) {
+    // create the event bus
+    this.eventBus = new EventBus()
+
+    // create the link service
+    this.linkService = new PDFLinkService({
+      eventBus: this.eventBus,
+      externalLinkTarget: LinkTarget.BLANK,
+      externalLinkRel: 'noopener',
+    })
+
+    // create the viewer
+    this.viewer = new PDFViewer({
+      container: this.container,
+      eventBus: this.eventBus,
+      imageResourcesPath,
+      linkService: this.linkService,
+      maxCanvasPixels: browser.safari ? 4096 * 4096 : 8192 * 8192, // default is 4096 * 4096, increased for better resolution at high zoom levels (but not in Safari, which struggles with large canvases)
+      annotationMode: PDFJS.AnnotationMode.ENABLE, // enable annotations but not forms
+      annotationEditorMode: PDFJS.AnnotationEditorType.DISABLE, // disable annotation editing
+    })
+
+    this.linkService.setViewer(this.viewer)
+
+    this.pdfCachingTransportFactory = generatePdfCachingTransportFactory()
+  }
+
+  // load a document from a URL
+  async loadDocument({
+    url,
+    pdfFile,
+    abortController,
+    handleFetchError,
+  }: {
+    url: string
+    pdfFile: Record<string, any>
+    abortController: AbortController
+    handleFetchError: (error: Error) => void
+  }) {
+    this.url = url
+
+    const rangeTransport = this.pdfCachingTransportFactory({
+      url,
+      pdfFile,
+      abortController,
+      handleFetchError,
+    })
+    let rangeChunkSize = DEFAULT_RANGE_CHUNK_SIZE
+    if (rangeTransport && pdfFile.size < 2 * DEFAULT_RANGE_CHUNK_SIZE) {
+      // pdf.js disables the "bulk" download optimization when providing a
+      //  custom range transport. Restore it by bumping the chunk size.
+      rangeChunkSize = pdfFile.size
+    }
+
+    try {
+      const doc = await loadPdfDocumentFromUrl(url, {
+        rangeChunkSize,
+        range: rangeTransport,
+      }).promise
+
+      // check that this is still the current URL
+      if (url !== this.url) {
+        return
+      }
+
+      this.viewer.setDocument(doc)
+      this.linkService.setDocument(doc)
+
+      return doc
+    } catch (error: any) {
+      if (!error || error.name !== 'MissingPDFException') {
+        captureException(error, {
+          tags: { handler: 'pdf-preview' },
+        })
+      }
+
+      throw error
+    }
+  }
+
+  async fetchAllData() {
+    await this.viewer.pdfDocument?.getData()
+  }
+
+  // update the current scale value if the container size changes
+  updateOnResize() {
+    if (!this.isVisible()) {
+      return
+    }
+
+    // Use requestAnimationFrame to prevent errors like "ResizeObserver loop
+    // completed with undelivered notifications" that can occur if updating the
+    // viewer causes another repaint. The cost of this is that the viewer update
+    // lags one frame behind, but it's unlikely to matter.
+    // Further reading: https://github.com/WICG/resize-observer/issues/38
+    window.requestAnimationFrame(() => {
+      const currentScaleValue = this.viewer.currentScaleValue
+
+      if (
+        currentScaleValue === 'auto' ||
+        currentScaleValue === 'page-fit' ||
+        currentScaleValue === 'page-height' ||
+        currentScaleValue === 'page-width'
+      ) {
+        this.viewer.currentScaleValue = currentScaleValue
+      }
+
+      this.viewer.update()
+    })
+  }
+
+  // get the page and offset of a click event
+  clickPosition(event: MouseEvent, canvas: HTMLCanvasElement, page: number) {
+    if (!canvas) {
+      return
+    }
+
+    const { viewport } = this.viewer.getPageView(page)
+
+    const pageRect = canvas.getBoundingClientRect()
+
+    const dx = event.clientX - pageRect.left
+    const dy = event.clientY - pageRect.top
+
+    const [left, top] = viewport.convertToPdfPoint(dx, dy)
+
+    return {
+      page,
+      offset: {
+        left,
+        top: viewport.viewBox[3] - top,
+      },
+    }
+  }
+
+  // get the current page, offset and page size
+  get currentPosition() {
+    const pageIndex = this.viewer.currentPageNumber - 1
+    const pageView = this.viewer.getPageView(pageIndex)
+    const pageRect = pageView.div.getBoundingClientRect()
+
+    const containerRect = this.container.getBoundingClientRect()
+    const dy = containerRect.top - pageRect.top
+    const dx = containerRect.left - pageRect.left
+    const [left, top] = pageView.viewport.convertToPdfPoint(dx, dy)
+    const [, , width, height] = pageView.viewport.viewBox
+
+    return {
+      page: pageIndex,
+      offset: { top, left },
+      pageSize: { height, width },
+    }
+  }
+
+  scrollToPosition(position: Record<string, any>, scale = null) {
+    const destArray = [
+      null,
+      {
+        name: 'XYZ', // 'XYZ' = scroll to the given coordinates
+      },
+      position.offset.left,
+      position.offset.top,
+      scale,
+    ]
+
+    this.viewer.scrollPageIntoView({
+      pageNumber: position.page + 1,
+      destArray,
+    })
+
+    // scroll the page left and down by an extra few pixels to account for the pdf.js viewer page border
+    const pageIndex = this.viewer.currentPageNumber - 1
+    const pageView = this.viewer.getPageView(pageIndex)
+    const offset = parseFloat(getComputedStyle(pageView.div).borderWidth)
+    this.viewer.container.scrollBy({
+      top: -offset,
+      left: -offset,
+    })
+  }
+
+  isVisible() {
+    return this.viewer.container.offsetParent !== null
+  }
+}
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-js.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-js.ts
@@ -0,0 +1,33 @@
+import * as PDFJS from 'pdfjs-dist'
+import type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api'
+
+export { PDFJS }
+
+PDFJS.GlobalWorkerOptions.workerPort = new Worker(
+  /* webpackChunkName: "pdf-worker" */
+  new URL('pdfjs-dist/build/pdf.worker.mjs', import.meta.url) // NOTE: .mjs extension
+)
+
+export const imageResourcesPath = '/images/pdfjs-dist/'
+const cMapUrl = '/js/pdfjs-dist/cmaps/'
+const standardFontDataUrl = '/fonts/pdfjs-dist/'
+
+const params = new URLSearchParams(window.location.search)
+const disableFontFace = params.get('disable-font-face') === 'true'
+const disableStream = process.env.NODE_ENV !== 'test'
+
+export const loadPdfDocumentFromUrl = (
+  url: string,
+  options: Partial<DocumentInitParameters> = {}
+) =>
+  PDFJS.getDocument({
+    url,
+    cMapUrl,
+    standardFontDataUrl,
+    disableFontFace,
+    disableAutoFetch: true, // only fetch the data needed for the displayed pages
+    disableStream,
+    isEvalSupported: false,
+    enableXfa: false, // default is false (2021-10-12), but set explicitly to be sure
+    ...options,
+  })
--- a/services/web/frontend/js/features/pdf-preview/util/types.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/types.ts
@@ -0,0 +1,42 @@
+import React from 'react'
+import { CompileOutputFile } from '../../../../../types/compile'
+
+export type LogEntry = {
+  raw: string
+  level: ErrorLevel
+  key: string
+  file?: string
+  column?: number
+  line?: number
+  ruleId?: string
+  message?: string
+  content?: string
+  type?: string
+  messageComponent?: React.ReactNode
+  contentDetails?: string[]
+}
+
+export type ErrorLevel =
+  | 'error'
+  | 'warning'
+  | 'info'
+  | 'typesetting'
+  | 'raw'
+  | 'success'
+
+export type SourceLocation = {
+  file?: string
+  // `line should be either a number or null (i.e. not required), but currently sometimes we get
+  // an empty string (from BibTeX errors).
+  line?: number | string | null
+  column?: number
+}
+
+export type PdfFileData = CompileOutputFile
+type PdfFileArchiveData = CompileOutputFile & { fileCount: number }
+
+export type PdfFileDataList = {
+  top: PdfFileData[]
+  other: PdfFileData[]
+  archive?: PdfFileArchiveData
+}