first commit

2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions
--- a/services/history-v1/.gitignore
+++ b/services/history-v1/.gitignore
@@ -0,0 +1,3 @@
+
+# managed by monorepo$ bin/update_build_scripts
+.npmrc
--- a/services/history-v1/.mocharc.json
+++ b/services/history-v1/.mocharc.json
@@ -0,0 +1,3 @@
+{
+  "require": "test/setup.js"
+}
--- a/services/history-v1/.nvmrc
+++ b/services/history-v1/.nvmrc
@@ -0,0 +1 @@
+20.18.2
--- a/services/history-v1/Dockerfile
+++ b/services/history-v1/Dockerfile
@@ -0,0 +1,32 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+FROM node:20.18.2 AS base
+
+WORKDIR /overleaf/services/history-v1
+COPY services/history-v1/install_deps.sh /overleaf/services/history-v1/
+RUN chmod 0755 ./install_deps.sh && ./install_deps.sh
+
+# Google Cloud Storage needs a writable $HOME/.config for resumable uploads
+# (see https://googleapis.dev/nodejs/storage/latest/File.html#createWriteStream)
+RUN mkdir /home/node/.config && chown node:node /home/node/.config
+
+# fs persistor needs a writable folder as a target for the mounted volume
+RUN mkdir /buckets && chown node:node /buckets
+
+FROM base AS app
+
+COPY package.json package-lock.json /overleaf/
+COPY services/history-v1/package.json /overleaf/services/history-v1/
+COPY libraries/ /overleaf/libraries/
+COPY patches/ /overleaf/patches/
+
+RUN cd /overleaf && npm ci --quiet
+
+COPY services/history-v1/ /overleaf/services/history-v1/
+
+FROM app
+USER node
+
+CMD ["node", "--expose-gc", "app.js"]
--- a/services/history-v1/Makefile
+++ b/services/history-v1/Makefile
@@ -0,0 +1,156 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+BUILD_NUMBER ?= local
+BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
+PROJECT_NAME = history-v1
+BUILD_DIR_NAME = $(shell pwd | xargs basename | tr -cd '[a-zA-Z0-9_.\-]')
+
+DOCKER_COMPOSE_FLAGS ?= -f docker-compose.yml
+DOCKER_COMPOSE := BUILD_NUMBER=$(BUILD_NUMBER) \
+	BRANCH_NAME=$(BRANCH_NAME) \
+	PROJECT_NAME=$(PROJECT_NAME) \
+	MOCHA_GREP=${MOCHA_GREP} \
+	docker compose ${DOCKER_COMPOSE_FLAGS}
+
+COMPOSE_PROJECT_NAME_TEST_ACCEPTANCE ?= test_acceptance_$(BUILD_DIR_NAME)
+DOCKER_COMPOSE_TEST_ACCEPTANCE = \
+	COMPOSE_PROJECT_NAME=$(COMPOSE_PROJECT_NAME_TEST_ACCEPTANCE) $(DOCKER_COMPOSE)
+
+COMPOSE_PROJECT_NAME_TEST_UNIT ?= test_unit_$(BUILD_DIR_NAME)
+DOCKER_COMPOSE_TEST_UNIT = \
+	COMPOSE_PROJECT_NAME=$(COMPOSE_PROJECT_NAME_TEST_UNIT) $(DOCKER_COMPOSE)
+
+clean:
+	-docker rmi ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
+	-docker rmi us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
+	-$(DOCKER_COMPOSE_TEST_UNIT) down --rmi local
+	-$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down --rmi local
+
+HERE=$(shell pwd)
+MONOREPO=$(shell cd ../../ && pwd)
+# Run the linting commands in the scope of the monorepo.
+# Eslint and prettier (plus some configs) are on the root.
+RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.2 npm run --silent
+
+RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
+
+# Same but from the top of the monorepo
+RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.2 npm run --silent
+
+SHELLCHECK_OPTS = \
+	--shell=bash \
+	--external-sources
+SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
+SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
+
+shellcheck:
+	@$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
+		koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
+
+shellcheck_fix:
+	@$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
+		diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
+		if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
+		elif [ -n "$$diff" ]; then echo "$$file"; \
+		else echo "\033[2m$$file\033[0m"; fi \
+	done
+
+format:
+	$(RUN_LINTING) format
+
+format_ci:
+	$(RUN_LINTING_CI) format
+
+format_fix:
+	$(RUN_LINTING) format:fix
+
+lint:
+	$(RUN_LINTING) lint
+
+lint_ci:
+	$(RUN_LINTING_CI) lint
+
+lint_fix:
+	$(RUN_LINTING) lint:fix
+
+typecheck:
+	$(RUN_LINTING) types:check
+
+typecheck_ci:
+	$(RUN_LINTING_CI) types:check
+
+test: format lint typecheck shellcheck test_unit test_acceptance
+
+test_unit:
+ifneq (,$(wildcard test/unit))
+	$(DOCKER_COMPOSE_TEST_UNIT) run --rm test_unit
+	$(MAKE) test_unit_clean
+endif
+
+test_clean: test_unit_clean
+test_unit_clean:
+ifneq (,$(wildcard test/unit))
+	$(DOCKER_COMPOSE_TEST_UNIT) down -v -t 0
+endif
+
+test_acceptance: test_acceptance_clean test_acceptance_pre_run test_acceptance_run
+	$(MAKE) test_acceptance_clean
+
+test_acceptance_debug: test_acceptance_clean test_acceptance_pre_run test_acceptance_run_debug
+	$(MAKE) test_acceptance_clean
+
+test_acceptance_run:
+ifneq (,$(wildcard test/acceptance))
+	$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance
+endif
+
+test_acceptance_run_debug:
+ifneq (,$(wildcard test/acceptance))
+	$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run -p 127.0.0.9:19999:19999 --rm test_acceptance npm run test:acceptance -- --inspect=0.0.0.0:19999 --inspect-brk
+endif
+
+test_clean: test_acceptance_clean
+test_acceptance_clean:
+	$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
+
+test_acceptance_pre_run:
+ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
+	$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
+endif
+
+benchmarks:
+	$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance npm run benchmarks
+
+build:
+	docker build \
+		--pull \
+		--build-arg BUILDKIT_INLINE_CACHE=1 \
+		--tag ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
+		--tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
+		--tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
+		--cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
+		--cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):main \
+		--file Dockerfile \
+		../..
+
+tar:
+	$(DOCKER_COMPOSE) up tar
+
+publish:
+
+	docker push $(DOCKER_REPO)/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
+
+
+.PHONY: clean \
+	format format_fix \
+	lint lint_fix \
+	build_types typecheck \
+	lint_ci format_ci typecheck_ci \
+	shellcheck shellcheck_fix \
+	test test_clean test_unit test_unit_clean \
+	test_acceptance test_acceptance_debug test_acceptance_pre_run \
+	test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
+	benchmarks \
+	build tar publish \
--- a/services/history-v1/README.md
+++ b/services/history-v1/README.md
@@ -0,0 +1,51 @@
+## Database migrations
+
+The history service uses knex to manage PostgreSQL migrations.
+
+To create a new migrations, run:
+```
+npx knex migrate:make migration_name
+```
+
+To apply migrations, run:
+```
+npx knex migrate:latest
+```
+
+For more information, consult the [knex migrations
+guide](https://knexjs.org/guide/migrations.html#migration-cli).
+
+## Global blobs
+
+Global blobs are blobs that are shared between projects. The list of global
+blobs is stored in the projectHistoryGlobalBlobs Mongo collection and is read
+when the service starts. Changing the list of global blobs needs to be done
+carefully.
+
+### Adding a blob to the global blobs list
+
+If we identify a blob that appears in many projects, we might want to move that
+blob to the global blobs list.
+
+1. Add a record for the blob to the projectHistoryGlobalBlobs collection.
+2. Restart the history service.
+3. Delete any corresponding project blobs.
+
+### Removing a blob from the global blobs list
+
+Removing a blob from the global blobs list is trickier. As soon as the global
+blob is made unavailable, every project that needs the blob will have to get
+its own copy. To avoid disruptions, follow these steps:
+
+1. In the projectHistoryGlobalBlobs collection, set the `demoted` property to
+   `false` on the global blob to remove. This will make the history system
+   write new instances of this blob to project blobs, but still read from the
+   global blob.
+
+2. Restart the history service.
+
+3. Copy the blob to all projects that need it.
+
+4. Remove the blob from the projectHistoryGlobalBlobs collection.
+
+5. Restart the history service.
--- a/services/history-v1/api/app/security.js
+++ b/services/history-v1/api/app/security.js
@@ -0,0 +1,149 @@
+'use strict'
+
+const basicAuth = require('basic-auth')
+const config = require('config')
+const HTTPStatus = require('http-status')
+const jwt = require('jsonwebtoken')
+const tsscmp = require('tsscmp')
+
+function setupBasicHttpAuthForSwaggerDocs(app) {
+  app.use('/docs', function (req, res, next) {
+    if (hasValidBasicAuthCredentials(req)) {
+      return next()
+    }
+
+    res.header('WWW-Authenticate', 'Basic realm="Application"')
+    res.status(HTTPStatus.UNAUTHORIZED).end()
+  })
+}
+
+exports.setupBasicHttpAuthForSwaggerDocs = setupBasicHttpAuthForSwaggerDocs
+
+function hasValidBasicAuthCredentials(req) {
+  const credentials = basicAuth(req)
+  if (!credentials) return false
+
+  // No security in the name, so just use straight comparison.
+  if (credentials.name !== 'staging') return false
+
+  const password = config.get('basicHttpAuth.password')
+  if (password && tsscmp(credentials.pass, password)) return true
+
+  // Support an old password so we can change the password without downtime.
+  if (config.has('basicHttpAuth.oldPassword')) {
+    const oldPassword = config.get('basicHttpAuth.oldPassword')
+    if (oldPassword && tsscmp(credentials.pass, oldPassword)) return true
+  }
+
+  return false
+}
+
+function setupSSL(app) {
+  const httpsOnly = config.get('httpsOnly') === 'true'
+  if (!httpsOnly) {
+    return
+  }
+  app.enable('trust proxy')
+  app.use(function (req, res, next) {
+    if (req.protocol === 'https') {
+      next()
+      return
+    }
+    if (req.method === 'GET' || req.method === 'HEAD') {
+      res.redirect('https://' + req.headers.host + req.url)
+    } else {
+      res
+        .status(HTTPStatus.FORBIDDEN)
+        .send('Please use HTTPS when submitting data to this server.')
+    }
+  })
+}
+
+exports.setupSSL = setupSSL
+
+function handleJWTAuth(req, authOrSecDef, scopesOrApiKey, next) {
+  // as a temporary solution, to make the OT demo still work
+  // this handler will also check for basic authorization
+  if (hasValidBasicAuthCredentials(req)) {
+    return next()
+  }
+  let token, err
+  if (authOrSecDef.name === 'token') {
+    token = req.query.token
+  } else if (
+    req.headers.authorization &&
+    req.headers.authorization.split(' ')[0] === 'Bearer'
+  ) {
+    token = req.headers.authorization.split(' ')[1]
+  }
+  if (!token) {
+    err = new Error('jwt missing')
+    err.statusCode = HTTPStatus.UNAUTHORIZED
+    err.headers = { 'WWW-Authenticate': 'Bearer' }
+    return next(err)
+  }
+  let decoded
+  try {
+    decoded = decodeJWT(token)
+  } catch (error) {
+    if (
+      error instanceof jwt.JsonWebTokenError ||
+      error instanceof jwt.TokenExpiredError
+    ) {
+      err = new Error(error.message)
+      err.statusCode = HTTPStatus.UNAUTHORIZED
+      err.headers = { 'WWW-Authenticate': 'Bearer error="invalid_token"' }
+      return next(err)
+    }
+    throw error
+  }
+  if (decoded.project_id.toString() !== req.swagger.params.project_id.value) {
+    err = new Error('Wrong project_id')
+    err.statusCode = HTTPStatus.FORBIDDEN
+    return next(err)
+  }
+  next()
+}
+
+exports.hasValidBasicAuthCredentials = hasValidBasicAuthCredentials
+
+/**
+ * Verify and decode the given JSON Web Token
+ */
+function decodeJWT(token) {
+  const key = config.get('jwtAuth.key')
+  const algorithm = config.get('jwtAuth.algorithm')
+  try {
+    return jwt.verify(token, key, { algorithms: [algorithm] })
+  } catch (err) {
+    // Support an old key so we can change the key without downtime.
+    if (config.has('jwtAuth.oldKey')) {
+      const oldKey = config.get('jwtAuth.oldKey')
+      return jwt.verify(token, oldKey, { algorithms: [algorithm] })
+    } else {
+      throw err
+    }
+  }
+}
+function handleBasicAuth(req, authOrSecDef, scopesOrApiKey, next) {
+  if (hasValidBasicAuthCredentials(req)) {
+    return next()
+  }
+  const error = new Error()
+  error.statusCode = HTTPStatus.UNAUTHORIZED
+  error.headers = { 'WWW-Authenticate': 'Basic realm="Application"' }
+  return next(error)
+}
+
+function getSwaggerHandlers() {
+  const handlers = {}
+  if (!config.has('jwtAuth.key') || !config.has('basicHttpAuth.password')) {
+    throw new Error('missing authentication env vars')
+  }
+  handlers.jwt = handleJWTAuth
+  handlers.basic = handleBasicAuth
+  handlers.token = handleJWTAuth
+  return handlers
+}
+
+exports.getSwaggerHandlers = getSwaggerHandlers
--- a/services/history-v1/api/controllers/expressify.js
+++ b/services/history-v1/api/controllers/expressify.js
@@ -0,0 +1,10 @@
+/**
+ * Turn an async function into an Express middleware
+ */
+function expressify(fn) {
+  return (req, res, next) => {
+    fn(req, res, next).catch(next)
+  }
+}
+
+module.exports = expressify
--- a/services/history-v1/api/controllers/health_checks.js
+++ b/services/history-v1/api/controllers/health_checks.js
@@ -0,0 +1,23 @@
+const logger = require('@overleaf/logger')
+const expressify = require('./expressify')
+const { mongodb } = require('../../storage')
+
+async function status(req, res) {
+  try {
+    await mongodb.db.command({ ping: 1 })
+  } catch (err) {
+    logger.warn({ err }, 'Lost connection with MongoDB')
+    res.status(500).send('Lost connection with MongoDB')
+    return
+  }
+  res.send('history-v1 is up')
+}
+
+function healthCheck(req, res) {
+  res.send('OK')
+}
+
+module.exports = {
+  status: expressify(status),
+  healthCheck,
+}
--- a/services/history-v1/api/controllers/project_import.js
+++ b/services/history-v1/api/controllers/project_import.js
@@ -0,0 +1,141 @@
+// @ts-check
+
+'use strict'
+
+const { expressify } = require('@overleaf/promise-utils')
+
+const HTTPStatus = require('http-status')
+
+const core = require('overleaf-editor-core')
+const Change = core.Change
+const Chunk = core.Chunk
+const File = core.File
+const FileMap = core.FileMap
+const Snapshot = core.Snapshot
+const TextOperation = core.TextOperation
+
+const logger = require('@overleaf/logger')
+
+const storage = require('../../storage')
+const BatchBlobStore = storage.BatchBlobStore
+const BlobStore = storage.BlobStore
+const chunkStore = storage.chunkStore
+const HashCheckBlobStore = storage.HashCheckBlobStore
+const persistChanges = storage.persistChanges
+const InvalidChangeError = storage.InvalidChangeError
+
+const render = require('./render')
+
+async function importSnapshot(req, res) {
+  const projectId = req.swagger.params.project_id.value
+  const rawSnapshot = req.swagger.params.snapshot.value
+
+  let snapshot
+
+  try {
+    snapshot = Snapshot.fromRaw(rawSnapshot)
+  } catch (err) {
+    return render.unprocessableEntity(res)
+  }
+
+  let historyId
+  try {
+    historyId = await chunkStore.initializeProject(projectId, snapshot)
+  } catch (err) {
+    if (err instanceof chunkStore.AlreadyInitialized) {
+      return render.conflict(res)
+    } else {
+      throw err
+    }
+  }
+
+  res.status(HTTPStatus.OK).json({ projectId: historyId })
+}
+
+async function importChanges(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const rawChanges = req.swagger.params.changes.value
+  const endVersion = req.swagger.params.end_version.value
+  const returnSnapshot = req.swagger.params.return_snapshot.value || 'none'
+
+  let changes
+
+  try {
+    changes = rawChanges.map(Change.fromRaw)
+  } catch (err) {
+    logger.warn({ err, projectId }, 'failed to parse changes')
+    return render.unprocessableEntity(res)
+  }
+
+  // Set limits to force us to persist all of the changes.
+  const farFuture = new Date()
+  farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
+  const limits = {
+    maxChanges: 0,
+    minChangeTimestamp: farFuture,
+    maxChangeTimestamp: farFuture,
+  }
+
+  const blobStore = new BlobStore(projectId)
+  const batchBlobStore = new BatchBlobStore(blobStore)
+  const hashCheckBlobStore = new HashCheckBlobStore(blobStore)
+
+  async function loadFiles() {
+    const blobHashes = new Set()
+    for (const change of changes) {
+      // This populates the set blobHashes with blobs referred to in the change
+      change.findBlobHashes(blobHashes)
+    }
+
+    await batchBlobStore.preload(Array.from(blobHashes))
+
+    for (const change of changes) {
+      await change.loadFiles('lazy', batchBlobStore)
+    }
+  }
+
+  async function buildResultSnapshot(resultChunk) {
+    const chunk = resultChunk || (await chunkStore.loadLatest(projectId))
+    const snapshot = chunk.getSnapshot()
+    snapshot.applyAll(chunk.getChanges())
+    const rawSnapshot = await snapshot.store(hashCheckBlobStore)
+    return rawSnapshot
+  }
+
+  await loadFiles()
+
+  let result
+  try {
+    result = await persistChanges(projectId, changes, limits, endVersion)
+  } catch (err) {
+    if (
+      err instanceof Chunk.ConflictingEndVersion ||
+      err instanceof TextOperation.UnprocessableError ||
+      err instanceof File.NotEditableError ||
+      err instanceof FileMap.PathnameError ||
+      err instanceof Snapshot.EditMissingFileError ||
+      err instanceof chunkStore.ChunkVersionConflictError ||
+      err instanceof InvalidChangeError
+    ) {
+      // If we failed to apply operations, that's probably because they were
+      // invalid.
+      logger.warn({ err, projectId, endVersion }, 'changes rejected by history')
+      return render.unprocessableEntity(res)
+    } else if (err instanceof Chunk.NotFoundError) {
+      logger.warn({ err, projectId }, 'chunk not found')
+      return render.notFound(res)
+    } else {
+      throw err
+    }
+  }
+
+  if (returnSnapshot === 'none') {
+    res.status(HTTPStatus.CREATED).json({})
+  } else {
+    const rawSnapshot = await buildResultSnapshot(result && result.currentChunk)
+    res.status(HTTPStatus.CREATED).json(rawSnapshot)
+  }
+}
+
+exports.importSnapshot = expressify(importSnapshot)
+exports.importChanges = expressify(importChanges)
--- a/services/history-v1/api/controllers/projects.js
+++ b/services/history-v1/api/controllers/projects.js
@@ -0,0 +1,388 @@
+'use strict'
+
+const _ = require('lodash')
+const Path = require('node:path')
+const Stream = require('node:stream')
+const HTTPStatus = require('http-status')
+const fs = require('node:fs')
+const { promisify } = require('node:util')
+const config = require('config')
+const OError = require('@overleaf/o-error')
+
+const logger = require('@overleaf/logger')
+const { Chunk, ChunkResponse, Blob } = require('overleaf-editor-core')
+const {
+  BlobStore,
+  blobHash,
+  chunkStore,
+  HashCheckBlobStore,
+  ProjectArchive,
+  zipStore,
+  chunkBuffer,
+} = require('../../storage')
+
+const render = require('./render')
+const expressify = require('./expressify')
+const withTmpDir = require('./with_tmp_dir')
+const StreamSizeLimit = require('./stream_size_limit')
+
+const pipeline = promisify(Stream.pipeline)
+
+async function initializeProject(req, res, next) {
+  let projectId = req.swagger.params.body.value.projectId
+  try {
+    projectId = await chunkStore.initializeProject(projectId)
+    res.status(HTTPStatus.OK).json({ projectId })
+  } catch (err) {
+    if (err instanceof chunkStore.AlreadyInitialized) {
+      render.conflict(res)
+    } else {
+      throw err
+    }
+  }
+}
+
+async function getLatestContent(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const blobStore = new BlobStore(projectId)
+  const chunk = await chunkBuffer.loadLatest(projectId)
+  const snapshot = chunk.getSnapshot()
+  snapshot.applyAll(chunk.getChanges())
+  await snapshot.loadFiles('eager', blobStore)
+  res.json(snapshot.toRaw())
+}
+
+async function getContentAtVersion(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const version = req.swagger.params.version.value
+  const blobStore = new BlobStore(projectId)
+  const snapshot = await getSnapshotAtVersion(projectId, version)
+  await snapshot.loadFiles('eager', blobStore)
+  res.json(snapshot.toRaw())
+}
+
+async function getLatestHashedContent(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const blobStore = new HashCheckBlobStore(new BlobStore(projectId))
+  const chunk = await chunkBuffer.loadLatest(projectId)
+  const snapshot = chunk.getSnapshot()
+  snapshot.applyAll(chunk.getChanges())
+  await snapshot.loadFiles('eager', blobStore)
+  const rawSnapshot = await snapshot.store(blobStore)
+  res.json(rawSnapshot)
+}
+
+async function getLatestHistory(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  try {
+    const chunk = await chunkBuffer.loadLatest(projectId)
+    const chunkResponse = new ChunkResponse(chunk)
+    res.json(chunkResponse.toRaw())
+  } catch (err) {
+    if (err instanceof Chunk.NotFoundError) {
+      render.notFound(res)
+    } else {
+      throw err
+    }
+  }
+}
+
+async function getLatestHistoryRaw(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const readOnly = req.swagger.params.readOnly.value
+  try {
+    const { startVersion, endVersion, endTimestamp } =
+      await chunkStore.loadLatestRaw(projectId, { readOnly })
+    res.json({
+      startVersion,
+      endVersion,
+      endTimestamp,
+    })
+  } catch (err) {
+    if (err instanceof Chunk.NotFoundError) {
+      render.notFound(res)
+    } else {
+      throw err
+    }
+  }
+}
+
+async function getHistory(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const version = req.swagger.params.version.value
+  try {
+    const chunk = await chunkStore.loadAtVersion(projectId, version)
+    const chunkResponse = new ChunkResponse(chunk)
+    res.json(chunkResponse.toRaw())
+  } catch (err) {
+    if (err instanceof Chunk.NotFoundError) {
+      render.notFound(res)
+    } else {
+      throw err
+    }
+  }
+}
+
+async function getHistoryBefore(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const timestamp = req.swagger.params.timestamp.value
+  try {
+    const chunk = await chunkStore.loadAtTimestamp(projectId, timestamp)
+    const chunkResponse = new ChunkResponse(chunk)
+    res.json(chunkResponse.toRaw())
+  } catch (err) {
+    if (err instanceof Chunk.NotFoundError) {
+      render.notFound(res)
+    } else {
+      throw err
+    }
+  }
+}
+
+/**
+ * Get all changes since the beginning of history or since a given version
+ */
+async function getChanges(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const since = req.swagger.params.since.value ?? 0
+
+  if (since < 0) {
+    // Negative values would cause an infinite loop
+    return res.status(400).json({
+      error: `Version out of bounds: ${since}`,
+    })
+  }
+
+  const changes = []
+  let chunk = await chunkBuffer.loadLatest(projectId)
+
+  if (since > chunk.getEndVersion()) {
+    return res.status(400).json({
+      error: `Version out of bounds: ${since}`,
+    })
+  }
+
+  // Fetch all chunks that come after the chunk that contains the start version
+  while (chunk.getStartVersion() > since) {
+    const changesInChunk = chunk.getChanges()
+    changes.unshift(...changesInChunk)
+    chunk = await chunkStore.loadAtVersion(projectId, chunk.getStartVersion())
+  }
+
+  // Extract the relevant changes from the chunk that contains the start version
+  const changesInChunk = chunk
+    .getChanges()
+    .slice(since - chunk.getStartVersion())
+  changes.unshift(...changesInChunk)
+
+  res.json(changes.map(change => change.toRaw()))
+}
+
+async function getZip(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const version = req.swagger.params.version.value
+  const blobStore = new BlobStore(projectId)
+
+  let snapshot
+  try {
+    snapshot = await getSnapshotAtVersion(projectId, version)
+  } catch (err) {
+    if (err instanceof Chunk.NotFoundError) {
+      return render.notFound(res)
+    } else {
+      throw err
+    }
+  }
+
+  await withTmpDir('get-zip-', async tmpDir => {
+    const tmpFilename = Path.join(tmpDir, 'project.zip')
+    const archive = new ProjectArchive(snapshot)
+    await archive.writeZip(blobStore, tmpFilename)
+    res.set('Content-Type', 'application/octet-stream')
+    res.set('Content-Disposition', 'attachment; filename=project.zip')
+    const stream = fs.createReadStream(tmpFilename)
+    await pipeline(stream, res)
+  })
+}
+
+async function createZip(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const version = req.swagger.params.version.value
+  try {
+    const snapshot = await getSnapshotAtVersion(projectId, version)
+    const zipUrl = await zipStore.getSignedUrl(projectId, version)
+    // Do not await this; run it in the background.
+    zipStore.storeZip(projectId, version, snapshot).catch(err => {
+      logger.error({ err, projectId, version }, 'createZip: storeZip failed')
+    })
+    res.status(HTTPStatus.OK).json({ zipUrl })
+  } catch (error) {
+    if (error instanceof Chunk.NotFoundError) {
+      render.notFound(res)
+    } else {
+      next(error)
+    }
+  }
+}
+
+async function deleteProject(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const blobStore = new BlobStore(projectId)
+  await Promise.all([
+    chunkStore.deleteProjectChunks(projectId),
+    blobStore.deleteBlobs(),
+  ])
+  res.status(HTTPStatus.NO_CONTENT).send()
+}
+
+async function createProjectBlob(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const expectedHash = req.swagger.params.hash.value
+  const maxUploadSize = parseInt(config.get('maxFileUploadSize'), 10)
+
+  await withTmpDir('blob-', async tmpDir => {
+    const tmpPath = Path.join(tmpDir, 'content')
+    const sizeLimit = new StreamSizeLimit(maxUploadSize)
+    await pipeline(req, sizeLimit, fs.createWriteStream(tmpPath))
+    if (sizeLimit.sizeLimitExceeded) {
+      return render.requestEntityTooLarge(res)
+    }
+    const hash = await blobHash.fromFile(tmpPath)
+    if (hash !== expectedHash) {
+      logger.debug({ hash, expectedHash }, 'Hash mismatch')
+      return render.conflict(res, 'File hash mismatch')
+    }
+
+    const blobStore = new BlobStore(projectId)
+    const newBlob = await blobStore.putFile(tmpPath)
+
+    try {
+      const { backupBlob } = await import('../../storage/lib/backupBlob.mjs')
+      await backupBlob(projectId, newBlob, tmpPath)
+    } catch (error) {
+      logger.warn({ error, projectId, hash }, 'Failed to backup blob')
+    }
+    res.status(HTTPStatus.CREATED).end()
+  })
+}
+
+async function headProjectBlob(req, res) {
+  const projectId = req.swagger.params.project_id.value
+  const hash = req.swagger.params.hash.value
+
+  const blobStore = new BlobStore(projectId)
+  const blob = await blobStore.getBlob(hash)
+  if (blob) {
+    res.set('Content-Length', blob.getByteLength())
+    res.status(200).end()
+  } else {
+    res.status(404).end()
+  }
+}
+
+// Support simple, singular ranges starting from zero only, up-to 2MB = 2_000_000, 7 digits
+const RANGE_HEADER = /^bytes=0-(\d{1,7})$/
+
+/**
+ * @param {string} header
+ * @return {{}|{start: number, end: number}}
+ * @private
+ */
+function _getRangeOpts(header) {
+  if (!header) return {}
+  const match = header.match(RANGE_HEADER)
+  if (match) {
+    const end = parseInt(match[1], 10)
+    return { start: 0, end }
+  }
+  return {}
+}
+
+async function getProjectBlob(req, res, next) {
+  const projectId = req.swagger.params.project_id.value
+  const hash = req.swagger.params.hash.value
+  const opts = _getRangeOpts(req.swagger.params.range.value || '')
+
+  const blobStore = new BlobStore(projectId)
+  logger.debug({ projectId, hash }, 'getProjectBlob started')
+  try {
+    let stream
+    try {
+      stream = await blobStore.getStream(hash, opts)
+    } catch (err) {
+      if (err instanceof Blob.NotFoundError) {
+        logger.warn({ projectId, hash }, 'Blob not found')
+        return res.status(404).end()
+      } else {
+        throw err
+      }
+    }
+    res.set('Content-Type', 'application/octet-stream')
+    try {
+      await pipeline(stream, res)
+    } catch (err) {
+      if (err?.code === 'ERR_STREAM_PREMATURE_CLOSE') {
+        res.end()
+      } else {
+        throw OError.tag(err, 'error transferring stream', { projectId, hash })
+      }
+    }
+  } finally {
+    logger.debug({ projectId, hash }, 'getProjectBlob finished')
+  }
+}
+
+async function copyProjectBlob(req, res, next) {
+  const sourceProjectId = req.swagger.params.copyFrom.value
+  const targetProjectId = req.swagger.params.project_id.value
+  const blobHash = req.swagger.params.hash.value
+  // Check that blob exists in source project
+  const sourceBlobStore = new BlobStore(sourceProjectId)
+  const targetBlobStore = new BlobStore(targetProjectId)
+  const [sourceBlob, targetBlob] = await Promise.all([
+    sourceBlobStore.getBlob(blobHash),
+    targetBlobStore.getBlob(blobHash),
+  ])
+  if (!sourceBlob) {
+    return render.notFound(res)
+  }
+  // Exit early if the blob exists in the target project.
+  // This will also catch global blobs, which always exist.
+  if (targetBlob) {
+    return res.status(HTTPStatus.NO_CONTENT).end()
+  }
+  // Otherwise, copy blob from source project to target project
+  await sourceBlobStore.copyBlob(sourceBlob, targetProjectId)
+  res.status(HTTPStatus.CREATED).end()
+}
+
+async function getSnapshotAtVersion(projectId, version) {
+  const chunk = await chunkStore.loadAtVersion(projectId, version)
+  const snapshot = chunk.getSnapshot()
+  const changes = _.dropRight(
+    chunk.getChanges(),
+    chunk.getEndVersion() - version
+  )
+  snapshot.applyAll(changes)
+  return snapshot
+}
+
+module.exports = {
+  initializeProject: expressify(initializeProject),
+  getLatestContent: expressify(getLatestContent),
+  getContentAtVersion: expressify(getContentAtVersion),
+  getLatestHashedContent: expressify(getLatestHashedContent),
+  getLatestPersistedHistory: expressify(getLatestHistory),
+  getLatestHistory: expressify(getLatestHistory),
+  getLatestHistoryRaw: expressify(getLatestHistoryRaw),
+  getHistory: expressify(getHistory),
+  getHistoryBefore: expressify(getHistoryBefore),
+  getChanges: expressify(getChanges),
+  getZip: expressify(getZip),
+  createZip: expressify(createZip),
+  deleteProject: expressify(deleteProject),
+  createProjectBlob: expressify(createProjectBlob),
+  getProjectBlob: expressify(getProjectBlob),
+  headProjectBlob: expressify(headProjectBlob),
+  copyProjectBlob: expressify(copyProjectBlob),
+}
--- a/services/history-v1/api/controllers/render.js
+++ b/services/history-v1/api/controllers/render.js
@@ -0,0 +1,17 @@
+'use strict'
+
+const HTTPStatus = require('http-status')
+
+function makeErrorRenderer(status) {
+  return (res, message) => {
+    res.status(status).json({ message: message || HTTPStatus[status] })
+  }
+}
+
+module.exports = {
+  badRequest: makeErrorRenderer(HTTPStatus.BAD_REQUEST),
+  notFound: makeErrorRenderer(HTTPStatus.NOT_FOUND),
+  unprocessableEntity: makeErrorRenderer(HTTPStatus.UNPROCESSABLE_ENTITY),
+  conflict: makeErrorRenderer(HTTPStatus.CONFLICT),
+  requestEntityTooLarge: makeErrorRenderer(HTTPStatus.REQUEST_ENTITY_TOO_LARGE),
+}
--- a/services/history-v1/api/controllers/stream_size_limit.js
+++ b/services/history-v1/api/controllers/stream_size_limit.js
@@ -0,0 +1,26 @@
+const stream = require('node:stream')
+
+/**
+ * Transform stream that stops passing bytes through after some threshold has
+ * been reached.
+ */
+class StreamSizeLimit extends stream.Transform {
+  constructor(maxSize) {
+    super()
+    this.maxSize = maxSize
+    this.accumulatedSize = 0
+    this.sizeLimitExceeded = false
+  }
+
+  _transform(chunk, encoding, cb) {
+    this.accumulatedSize += chunk.length
+    if (this.accumulatedSize > this.maxSize) {
+      this.sizeLimitExceeded = true
+    } else {
+      this.push(chunk)
+    }
+    cb()
+  }
+}
+
+module.exports = StreamSizeLimit
--- a/services/history-v1/api/controllers/with_tmp_dir.js
+++ b/services/history-v1/api/controllers/with_tmp_dir.js
@@ -0,0 +1,27 @@
+const fs = require('node:fs')
+const fsExtra = require('fs-extra')
+const logger = require('@overleaf/logger')
+const os = require('node:os')
+const path = require('node:path')
+
+/**
+ * Create a temporary directory before executing a function and cleaning up
+ * after.
+ *
+ * @param {string} prefix - prefix for the temporary directory name
+ * @param {Function} fn - async function to call
+ */
+async function withTmpDir(prefix, fn) {
+  const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), prefix))
+  try {
+    await fn(tmpDir)
+  } finally {
+    fsExtra.remove(tmpDir).catch(err => {
+      if (err.code !== 'ENOENT') {
+        logger.error({ err }, 'failed to delete temporary file')
+      }
+    })
+  }
+}
+
+module.exports = withTmpDir
--- a/services/history-v1/api/swagger/index.js
+++ b/services/history-v1/api/swagger/index.js
@@ -0,0 +1,269 @@
+'use strict'
+
+const _ = require('lodash')
+const paths = _.reduce(
+  [require('./projects').paths, require('./project_import').paths],
+  _.extend
+)
+
+const securityDefinitions = require('./security_definitions')
+module.exports = {
+  swagger: '2.0',
+  info: {
+    title: 'Overleaf Editor API',
+    description: 'API for the Overleaf editor.',
+    version: '1.0',
+  },
+  produces: ['application/json'],
+  basePath: '/api',
+  paths,
+  securityDefinitions,
+  security: [
+    {
+      jwt: [],
+    },
+  ],
+  definitions: {
+    Project: {
+      properties: {
+        projectId: {
+          type: 'string',
+        },
+      },
+      required: ['projectId'],
+    },
+    File: {
+      properties: {
+        hash: {
+          type: 'string',
+        },
+        byteLength: {
+          type: 'integer',
+        },
+        stringLength: {
+          type: 'integer',
+        },
+      },
+    },
+    Label: {
+      properties: {
+        authorId: {
+          type: 'integer',
+        },
+        text: {
+          type: 'string',
+        },
+        timestamp: {
+          type: 'string',
+        },
+        version: {
+          type: 'integer',
+        },
+      },
+    },
+    Chunk: {
+      properties: {
+        history: {
+          $ref: '#/definitions/History',
+        },
+        startVersion: {
+          type: 'number',
+        },
+      },
+    },
+    ChunkResponse: {
+      properties: {
+        chunk: {
+          $ref: '#/definitions/Chunk',
+        },
+        authors: {
+          type: 'array',
+          items: {
+            $ref: '#/definitions/Author',
+          },
+        },
+      },
+    },
+    ChunkResponseRaw: {
+      properties: {
+        startVersion: {
+          type: 'number',
+        },
+        endVersion: {
+          type: 'number',
+        },
+        endTimestamp: {
+          type: 'string',
+        },
+      },
+    },
+    History: {
+      properties: {
+        snapshot: {
+          $ref: '#/definitions/Snapshot',
+        },
+        changes: {
+          type: 'array',
+          items: {
+            $ref: '#/definitions/Change',
+          },
+        },
+      },
+    },
+    Snapshot: {
+      properties: {
+        files: {
+          type: 'object',
+          additionalProperties: {
+            $ref: '#/definitions/File',
+          },
+        },
+      },
+      required: ['files'],
+    },
+    Change: {
+      properties: {
+        timestamp: {
+          type: 'string',
+        },
+        operations: {
+          type: 'array',
+          items: {
+            $ref: '#/definitions/Operation',
+          },
+        },
+        authors: {
+          type: 'array',
+          items: {
+            type: ['integer', 'null'],
+          },
+        },
+        v2Authors: {
+          type: 'array',
+          items: {
+            type: ['string', 'null'],
+          },
+        },
+        projectVersion: {
+          type: 'string',
+        },
+        v2DocVersions: {
+          type: 'object',
+          additionalProperties: {
+            $ref: '#/definitions/V2DocVersions',
+          },
+        },
+      },
+      required: ['timestamp', 'operations'],
+    },
+    V2DocVersions: {
+      properties: {
+        pathname: {
+          type: 'string',
+        },
+        v: {
+          type: 'integer',
+        },
+      },
+    },
+    ChangeRequest: {
+      properties: {
+        baseVersion: {
+          type: 'integer',
+        },
+        untransformable: {
+          type: 'boolean',
+        },
+        operations: {
+          type: 'array',
+          items: {
+            $ref: '#/definitions/Operation',
+          },
+        },
+        authors: {
+          type: 'array',
+          items: {
+            type: ['integer', 'null'],
+          },
+        },
+      },
+      required: ['baseVersion', 'operations'],
+    },
+    ChangeNote: {
+      properties: {
+        baseVersion: {
+          type: 'integer',
+        },
+        change: {
+          $ref: '#/definitions/Change',
+        },
+      },
+      required: ['baseVersion'],
+    },
+    Operation: {
+      properties: {
+        pathname: {
+          type: 'string',
+        },
+        newPathname: {
+          type: 'string',
+        },
+        blob: {
+          $ref: '#/definitions/Blob',
+        },
+        textOperation: {
+          type: 'array',
+          items: {},
+        },
+        file: {
+          $ref: '#/definitions/File',
+        },
+      },
+    },
+    Error: {
+      properties: {
+        message: {
+          type: 'string',
+        },
+      },
+      required: ['message'],
+    },
+    Blob: {
+      properties: {
+        hash: {
+          type: 'string',
+        },
+      },
+      required: ['hash'],
+    },
+    Author: {
+      properties: {
+        id: {
+          type: 'integer',
+        },
+        email: {
+          type: 'string',
+        },
+        name: {
+          type: 'string',
+        },
+      },
+      required: ['id', 'email', 'name'],
+    },
+    SyncState: {
+      properties: {
+        synced: {
+          type: 'boolean',
+        },
+      },
+    },
+    ZipInfo: {
+      properties: {
+        zipUrl: {
+          type: 'string',
+        },
+      },
+      required: ['zipUrl'],
+    },
+  },
+}
--- a/services/history-v1/api/swagger/project_import.js
+++ b/services/history-v1/api/swagger/project_import.js
@@ -0,0 +1,147 @@
+'use strict'
+
+const importSnapshot = {
+  'x-swagger-router-controller': 'project_import',
+  operationId: 'importSnapshot',
+  tags: ['ProjectImport'],
+  description: 'Import a snapshot from the current rails app.',
+  consumes: ['application/json'],
+  parameters: [
+    {
+      name: 'project_id',
+      in: 'path',
+      description: 'project id',
+      required: true,
+      type: 'string',
+    },
+    {
+      name: 'snapshot',
+      in: 'body',
+      description: 'Snapshot to import.',
+      required: true,
+      schema: {
+        $ref: '#/definitions/Snapshot',
+      },
+    },
+  ],
+  responses: {
+    200: {
+      description: 'Imported',
+    },
+    409: {
+      description: 'Conflict: project already initialized',
+    },
+    404: {
+      description: 'No such project exists',
+    },
+  },
+  security: [
+    {
+      basic: [],
+    },
+  ],
+}
+
+const importChanges = {
+  'x-swagger-router-controller': 'project_import',
+  operationId: 'importChanges',
+  tags: ['ProjectImport'],
+  description: 'Import changes for a project from the current rails app.',
+  consumes: ['application/json'],
+  parameters: [
+    {
+      name: 'project_id',
+      in: 'path',
+      description: 'project id',
+      required: true,
+      type: 'string',
+    },
+    {
+      name: 'end_version',
+      description: 'end_version of latest persisted chunk',
+      in: 'query',
+      required: true,
+      type: 'number',
+    },
+    {
+      name: 'return_snapshot',
+      description:
+        'optionally, return a snapshot with the latest hashed content',
+      in: 'query',
+      required: false,
+      type: 'string',
+      enum: ['hashed', 'none'],
+    },
+    {
+      name: 'changes',
+      in: 'body',
+      description: 'changes to be imported',
+      required: true,
+      schema: {
+        type: 'array',
+        items: {
+          $ref: '#/definitions/Change',
+        },
+      },
+    },
+  ],
+  responses: {
+    201: {
+      description: 'Created',
+      schema: {
+        $ref: '#/definitions/Snapshot',
+      },
+    },
+  },
+  security: [
+    {
+      basic: [],
+    },
+  ],
+}
+
+const getChanges = {
+  'x-swagger-router-controller': 'projects',
+  operationId: 'getChanges',
+  tags: ['Project'],
+  description: 'Get changes applied to a project',
+  parameters: [
+    {
+      name: 'project_id',
+      in: 'path',
+      description: 'project id',
+      required: true,
+      type: 'string',
+    },
+    {
+      name: 'since',
+      in: 'query',
+      description: 'start version',
+      required: false,
+      type: 'number',
+    },
+  ],
+  responses: {
+    200: {
+      description: 'Success',
+      schema: {
+        type: 'array',
+        items: {
+          $ref: '#/definitions/Change',
+        },
+      },
+    },
+  },
+  security: [
+    {
+      basic: [],
+    },
+  ],
+}
+
+exports.paths = {
+  '/projects/{project_id}/import': { post: importSnapshot },
+  '/projects/{project_id}/legacy_import': { post: importSnapshot },
+  '/projects/{project_id}/changes': { get: getChanges, post: importChanges },
+  '/projects/{project_id}/legacy_changes': { post: importChanges },
+}
--- a/services/history-v1/api/swagger/projects.js
+++ b/services/history-v1/api/swagger/projects.js
@@ -0,0 +1,588 @@
+'use strict'
+
+const Blob = require('overleaf-editor-core').Blob
+
+exports.paths = {
+  '/projects': {
+    post: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'initializeProject',
+      tags: ['Project'],
+      description: 'Initialize project.',
+      consumes: ['application/json'],
+      parameters: [
+        {
+          name: 'body',
+          in: 'body',
+          schema: {
+            type: 'object',
+            properties: {
+              projectId: { type: 'string' },
+            },
+          },
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Initialized',
+          schema: {
+            $ref: '#/definitions/Project',
+          },
+        },
+      },
+      security: [
+        {
+          basic: [],
+        },
+      ],
+    },
+  },
+  '/projects/{project_id}': {
+    delete: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'deleteProject',
+      tags: ['Project'],
+      description: "Delete a project's history",
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+      ],
+      responses: {
+        204: {
+          description: 'Success',
+        },
+      },
+      security: [
+        {
+          basic: [],
+        },
+      ],
+    },
+  },
+  '/projects/{project_id}/blobs/{hash}': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getProjectBlob',
+      tags: ['Project'],
+      description: 'Fetch blob content by its project id and hash.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'hash',
+          in: 'path',
+          description: 'Hexadecimal SHA-1 hash',
+          required: true,
+          type: 'string',
+          pattern: Blob.HEX_HASH_RX_STRING,
+        },
+        {
+          name: 'range',
+          in: 'header',
+          description: 'HTTP Range header',
+          required: false,
+          type: 'string',
+        },
+      ],
+      produces: ['application/octet-stream'],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            type: 'file',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+      security: [{ jwt: [] }, { token: [] }],
+    },
+    head: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'headProjectBlob',
+      tags: ['Project'],
+      description: 'Fetch blob content-length by its project id and hash.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'hash',
+          in: 'path',
+          description: 'Hexadecimal SHA-1 hash',
+          required: true,
+          type: 'string',
+          pattern: Blob.HEX_HASH_RX_STRING,
+        },
+      ],
+      produces: ['application/octet-stream'],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            type: 'file',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+      security: [{ jwt: [] }, { token: [] }],
+    },
+    put: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'createProjectBlob',
+      tags: ['Project'],
+      description:
+        'Create blob to be used in a file addition operation when importing a' +
+        ' snapshot or changes',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'hash',
+          in: 'path',
+          description: 'Hexadecimal SHA-1 hash',
+          required: true,
+          type: 'string',
+          pattern: Blob.HEX_HASH_RX_STRING,
+        },
+      ],
+      responses: {
+        201: {
+          description: 'Created',
+        },
+      },
+    },
+    post: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'copyProjectBlob',
+      tags: ['Project'],
+      description:
+        'Copies a blob from a source project to a target project when duplicating a project',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'target project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'hash',
+          in: 'path',
+          description: 'Hexadecimal SHA-1 hash',
+          required: true,
+          type: 'string',
+          pattern: Blob.HEX_HASH_RX_STRING,
+        },
+        {
+          name: 'copyFrom',
+          in: 'query',
+          description: 'source project id',
+          required: true,
+          type: 'string',
+        },
+      ],
+      responses: {
+        201: {
+          description: 'Created',
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/latest/content': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getLatestContent',
+      tags: ['Project'],
+      description:
+        'Get full content of the latest version. Text file ' +
+        'content is included, but binary files are just linked by hash.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/Snapshot',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/latest/hashed_content': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getLatestHashedContent',
+      tags: ['Project'],
+      description:
+        'Get a snapshot of a project at the latest version ' +
+        'with the hashes for the contents each file',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/Snapshot',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+      security: [
+        {
+          basic: [],
+        },
+      ],
+    },
+  },
+  '/projects/{project_id}/latest/history': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getLatestHistory',
+      tags: ['Project'],
+      description:
+        'Get the latest sequence of changes.' +
+        ' TODO probably want a configurable depth.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/ChunkResponse',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/latest/history/raw': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getLatestHistoryRaw',
+      tags: ['Project'],
+      description: 'Get the metadata of latest sequence of changes.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'readOnly',
+          in: 'query',
+          description: 'use read only database connection',
+          required: false,
+          type: 'boolean',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/ChunkResponseRaw',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/latest/persistedHistory': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getLatestPersistedHistory',
+      tags: ['Project'],
+      description: 'Get the latest sequence of changes.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/ChunkResponse',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+
+  '/projects/{project_id}/versions/{version}/history': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getHistory',
+      tags: ['Project'],
+      description:
+        'Get the sequence of changes that includes the given version.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'version',
+          in: 'path',
+          description: 'numeric version',
+          required: true,
+          type: 'number',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/ChunkResponse',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/versions/{version}/content': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getContentAtVersion',
+      tags: ['Project'],
+      description: 'Get full content at the given version',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'version',
+          in: 'path',
+          description: 'numeric version',
+          required: true,
+          type: 'number',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/Snapshot',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/timestamp/{timestamp}/history': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getHistoryBefore',
+      tags: ['Project'],
+      description:
+        'Get the sequence of changes. ' + ' before the given timestamp',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'timestamp',
+          in: 'path',
+          description: 'timestamp',
+          required: true,
+          type: 'string',
+          format: 'date-time',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'Success',
+          schema: {
+            $ref: '#/definitions/ChunkResponse',
+          },
+        },
+        404: {
+          description: 'Not Found',
+          schema: {
+            $ref: '#/definitions/Error',
+          },
+        },
+      },
+    },
+  },
+  '/projects/{project_id}/version/{version}/zip': {
+    get: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'getZip',
+      tags: ['Project'],
+      description: 'Download zip with project content',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'version',
+          in: 'path',
+          description: 'numeric version',
+          required: true,
+          type: 'number',
+        },
+      ],
+      produces: ['application/octet-stream'],
+      responses: {
+        200: {
+          description: 'success',
+        },
+        404: {
+          description: 'not found',
+        },
+      },
+      security: [
+        {
+          token: [],
+        },
+      ],
+    },
+    post: {
+      'x-swagger-router-controller': 'projects',
+      operationId: 'createZip',
+      tags: ['Project'],
+      description:
+        'Create a zip file with project content. Returns a link to be polled.',
+      parameters: [
+        {
+          name: 'project_id',
+          in: 'path',
+          description: 'project id',
+          required: true,
+          type: 'string',
+        },
+        {
+          name: 'version',
+          in: 'path',
+          description: 'numeric version',
+          required: true,
+          type: 'number',
+        },
+      ],
+      responses: {
+        200: {
+          description: 'success',
+          schema: {
+            $ref: '#/definitions/ZipInfo',
+          },
+        },
+        404: {
+          description: 'not found',
+        },
+      },
+      security: [
+        {
+          basic: [],
+        },
+      ],
+    },
+  },
+}
--- a/services/history-v1/api/swagger/security_definitions.js
+++ b/services/history-v1/api/swagger/security_definitions.js
@@ -0,0 +1,17 @@
+'use strict'
+
+module.exports = {
+  jwt: {
+    type: 'apiKey',
+    in: 'header',
+    name: 'authorization',
+  },
+  basic: {
+    type: 'basic',
+  },
+  token: {
+    type: 'apiKey',
+    in: 'query',
+    name: 'token',
+  },
+}
--- a/services/history-v1/app.js
+++ b/services/history-v1/app.js
@@ -0,0 +1,172 @@
+'use strict'
+
+/* eslint-disable no-console */
+
+// Metrics must be initialized before importing anything else
+require('@overleaf/metrics/initialize')
+
+const config = require('config')
+const Events = require('node:events')
+const BPromise = require('bluebird')
+const express = require('express')
+const helmet = require('helmet')
+const HTTPStatus = require('http-status')
+const logger = require('@overleaf/logger')
+const Metrics = require('@overleaf/metrics')
+const bodyParser = require('body-parser')
+const swaggerTools = require('swagger-tools')
+const swaggerDoc = require('./api/swagger')
+const security = require('./api/app/security')
+const healthChecks = require('./api/controllers/health_checks')
+const { mongodb, loadGlobalBlobs } = require('./storage')
+const path = require('node:path')
+
+Events.setMaxListeners(20)
+const app = express()
+module.exports = app
+
+logger.initialize('history-v1')
+Metrics.open_sockets.monitor()
+Metrics.injectMetricsRoute(app)
+app.use(Metrics.http.monitor(logger))
+Metrics.leaked_sockets.monitor(logger)
+
+// We may have fairly large JSON bodies when receiving large Changes. Clients
+// may have to handle 413 status codes and try creating files instead of sending
+// text content in changes.
+app.use(bodyParser.json({ limit: '6MB' }))
+app.use(
+  bodyParser.urlencoded({
+    extended: false,
+  })
+)
+
+security.setupSSL(app)
+security.setupBasicHttpAuthForSwaggerDocs(app)
+
+const HTTP_REQUEST_TIMEOUT = parseInt(config.get('httpRequestTimeout'), 10)
+app.use(function (req, res, next) {
+  res.setTimeout(HTTP_REQUEST_TIMEOUT)
+  next()
+})
+
+app.get('/', function (req, res) {
+  res.send('')
+})
+
+app.get('/status', healthChecks.status)
+app.get('/health_check', healthChecks.healthCheck)
+
+function setupSwagger() {
+  return new BPromise(function (resolve) {
+    swaggerTools.initializeMiddleware(swaggerDoc, function (middleware) {
+      app.use(middleware.swaggerMetadata())
+      app.use(middleware.swaggerSecurity(security.getSwaggerHandlers()))
+      app.use(middleware.swaggerValidator())
+      app.use(
+        middleware.swaggerRouter({
+          controllers: path.join(__dirname, 'api/controllers'),
+          useStubs: app.get('env') === 'development',
+        })
+      )
+      app.use(middleware.swaggerUi())
+      resolve()
+    })
+  })
+}
+
+function setupErrorHandling() {
+  app.use(function (req, res, next) {
+    const err = new Error('Not Found')
+    err.status = HTTPStatus.NOT_FOUND
+    return next(err)
+  })
+
+  // Handle Swagger errors.
+  app.use(function (err, req, res, next) {
+    const projectId = req.swagger?.params?.project_id?.value
+    if (res.headersSent) {
+      return next(err)
+    }
+
+    if (err.code === 'SCHEMA_VALIDATION_FAILED') {
+      logger.error({ err, projectId }, err.message)
+      return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json(err.results)
+    }
+    if (err.code === 'INVALID_TYPE' || err.code === 'PATTERN') {
+      logger.error({ err, projectId }, err.message)
+      return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
+        message: 'invalid type: ' + err.paramName,
+      })
+    }
+    if (err.code === 'ENUM_MISMATCH') {
+      return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
+        message: 'invalid enum value: ' + err.paramName,
+      })
+    }
+    if (err.code === 'REQUIRED') {
+      return res.status(HTTPStatus.UNPROCESSABLE_ENTITY).json({
+        message: err.message,
+      })
+    }
+    next(err)
+  })
+
+  app.use(function (err, req, res, next) {
+    const projectId = req.swagger?.params?.project_id?.value
+    logger.error({ err, projectId }, err.message)
+
+    if (res.headersSent) {
+      return next(err)
+    }
+
+    // Handle errors that specify a statusCode. Some come from our code. Some
+    // bubble up from AWS SDK, but they sometimes have the statusCode set to
+    // 200, notably some InternalErrors and TimeoutErrors, so we have to guard
+    // against that. We also check `status`, but `statusCode` is preferred.
+    const statusCode = err.statusCode || err.status
+    if (statusCode && statusCode >= 400 && statusCode < 600) {
+      res.status(statusCode)
+    } else {
+      res.status(HTTPStatus.INTERNAL_SERVER_ERROR)
+    }
+
+    const sendErrorToClient = app.get('env') === 'development'
+    res.json({
+      message: err.message,
+      error: sendErrorToClient ? err : {},
+    })
+  })
+}
+
+app.setup = async function appSetup() {
+  await mongodb.client.connect()
+  logger.info('Connected to MongoDB')
+  await loadGlobalBlobs()
+  logger.info('Global blobs loaded')
+  app.use(helmet())
+  await setupSwagger()
+  setupErrorHandling()
+}
+
+async function startApp() {
+  await app.setup()
+
+  const port = parseInt(process.env.PORT, 10) || 3100
+  app.listen(port, err => {
+    if (err) {
+      console.error(err)
+      process.exit(1)
+    }
+    Metrics.event_loop.monitor(logger)
+    Metrics.memory.monitor(logger)
+  })
+}
+
+// Run this if we're called directly
+if (!module.parent) {
+  startApp().catch(err => {
+    console.error(err)
+    process.exit(1)
+  })
+}
--- a/services/history-v1/backup-deletion-app.mjs
+++ b/services/history-v1/backup-deletion-app.mjs
@@ -0,0 +1,81 @@
+// @ts-check
+// Metrics must be initialized before importing anything else
+import '@overleaf/metrics/initialize.js'
+import http from 'node:http'
+import { fileURLToPath } from 'node:url'
+import { promisify } from 'node:util'
+import express from 'express'
+import logger from '@overleaf/logger'
+import Metrics from '@overleaf/metrics'
+import { hasValidBasicAuthCredentials } from './api/app/security.js'
+import {
+  deleteProjectBackupCb,
+  healthCheck,
+  healthCheckCb,
+  NotReadyToDelete,
+} from './storage/lib/backupDeletion.mjs'
+import { mongodb } from './storage/index.js'
+
+const app = express()
+
+logger.initialize('history-v1-backup-deletion')
+Metrics.open_sockets.monitor()
+Metrics.injectMetricsRoute(app)
+app.use(Metrics.http.monitor(logger))
+Metrics.leaked_sockets.monitor(logger)
+Metrics.event_loop.monitor(logger)
+Metrics.memory.monitor(logger)
+
+function basicAuth(req, res, next) {
+  if (hasValidBasicAuthCredentials(req)) return next()
+  res.setHeader('WWW-Authenticate', 'Basic realm="Application"')
+  res.sendStatus(401)
+}
+
+app.delete('/project/:projectId/backup', basicAuth, (req, res, next) => {
+  deleteProjectBackupCb(req.params.projectId, err => {
+    if (err) {
+      return next(err)
+    }
+    res.sendStatus(204)
+  })
+})
+
+app.get('/status', (req, res) => {
+  res.send('history-v1-backup-deletion is up')
+})
+
+app.get('/health_check', (req, res, next) => {
+  healthCheckCb(err => {
+    if (err) return next(err)
+    res.sendStatus(200)
+  })
+})
+
+app.use((err, req, res, next) => {
+  req.logger.addFields({ err })
+  if (err instanceof NotReadyToDelete) {
+    req.logger.setLevel('warn')
+    return res.status(422).send(err.message)
+  }
+  req.logger.setLevel('error')
+  next(err)
+})
+
+/**
+ * @param {number} port
+ * @return {Promise<http.Server>}
+ */
+export async function startApp(port) {
+  await mongodb.client.connect()
+  await healthCheck()
+  const server = http.createServer(app)
+  await promisify(server.listen.bind(server, port))()
+  return server
+}
+
+// Run this if we're called directly
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  const PORT = parseInt(process.env.PORT || '3101', 10)
+  await startApp(PORT)
+}
--- a/services/history-v1/backup-verifier-app.mjs
+++ b/services/history-v1/backup-verifier-app.mjs
@@ -0,0 +1,117 @@
+// @ts-check
+// Metrics must be initialized before importing anything else
+import '@overleaf/metrics/initialize.js'
+import http from 'node:http'
+import { fileURLToPath } from 'node:url'
+import { promisify } from 'node:util'
+import { setTimeout } from 'node:timers/promises'
+import express from 'express'
+import logger from '@overleaf/logger'
+import Metrics from '@overleaf/metrics'
+import { healthCheck } from './backupVerifier/healthCheck.mjs'
+import {
+  BackupCorruptedError,
+  verifyBlob,
+} from './storage/lib/backupVerifier.mjs'
+import { mongodb } from './storage/index.js'
+import { expressify } from '@overleaf/promise-utils'
+import { Blob } from 'overleaf-editor-core'
+import { loadGlobalBlobs } from './storage/lib/blob_store/index.js'
+import { EventEmitter } from 'node:events'
+import {
+  loopRandomProjects,
+  setWriteMetrics,
+} from './backupVerifier/ProjectVerifier.mjs'
+
+const app = express()
+
+logger.initialize('history-v1-backup-verifier')
+Metrics.open_sockets.monitor()
+Metrics.injectMetricsRoute(app)
+app.use(Metrics.http.monitor(logger))
+Metrics.leaked_sockets.monitor(logger)
+Metrics.event_loop.monitor(logger)
+Metrics.memory.monitor(logger)
+
+app.get(
+  '/history/:historyId/blob/:hash/verify',
+  expressify(async (req, res) => {
+    const { historyId, hash } = req.params
+    try {
+      await verifyBlob(historyId, hash)
+      res.sendStatus(200)
+    } catch (err) {
+      logger.warn({ err, historyId, hash }, 'manual verify blob failed')
+      if (err instanceof Blob.NotFoundError) {
+        res.status(404).send(err.message)
+      } else if (err instanceof BackupCorruptedError) {
+        res.status(422).send(err.message)
+      } else {
+        throw err
+      }
+    }
+  })
+)
+
+app.get('/status', (req, res) => {
+  res.send('history-v1-backup-verifier is up')
+})
+
+app.get(
+  '/health_check',
+  expressify(async (req, res) => {
+    await healthCheck()
+    res.sendStatus(200)
+  })
+)
+
+app.use((err, req, res, next) => {
+  req.logger.addFields({ err })
+  req.logger.setLevel('error')
+  next(err)
+})
+
+const shutdownEmitter = new EventEmitter()
+
+shutdownEmitter.once('shutdown', async code => {
+  logger.info({ code }, 'shutting down')
+  await mongodb.client.close()
+  await setTimeout(100)
+  process.exit(code)
+})
+
+process.on('SIGTERM', () => {
+  shutdownEmitter.emit('shutdown', 0)
+})
+
+process.on('SIGINT', () => {
+  shutdownEmitter.emit('shutdown', 0)
+})
+
+/**
+ * @param {number} port
+ * @param {boolean} enableVerificationLoop
+ * @return {Promise<http.Server>}
+ */
+export async function startApp(port, enableVerificationLoop = true) {
+  await mongodb.client.connect()
+  await loadGlobalBlobs()
+  await healthCheck()
+  const server = http.createServer(app)
+  await promisify(server.listen.bind(server, port))()
+  enableVerificationLoop && loopRandomProjects(shutdownEmitter)
+  return server
+}
+
+setWriteMetrics(true)
+
+// Run this if we're called directly
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  const PORT = parseInt(process.env.PORT || '3102', 10)
+  try {
+    await startApp(PORT)
+  } catch (error) {
+    shutdownEmitter.emit('shutdown', 1)
+    logger.error({ error }, 'error starting app')
+  }
+}
--- a/services/history-v1/backup-worker-app.mjs
+++ b/services/history-v1/backup-worker-app.mjs
@@ -0,0 +1,70 @@
+// @ts-check
+// Metrics must be initialized before importing anything else
+import '@overleaf/metrics/initialize.js'
+import http from 'node:http'
+import { fileURLToPath } from 'node:url'
+import { promisify } from 'node:util'
+import express from 'express'
+import logger from '@overleaf/logger'
+import Metrics from '@overleaf/metrics'
+import { expressify } from '@overleaf/promise-utils'
+import { drainQueue, healthCheck } from './storage/scripts/backup_worker.mjs'
+const app = express()
+
+logger.initialize('history-v1-backup-worker')
+Metrics.open_sockets.monitor()
+Metrics.injectMetricsRoute(app)
+app.use(Metrics.http.monitor(logger))
+Metrics.leaked_sockets.monitor(logger)
+Metrics.event_loop.monitor(logger)
+Metrics.memory.monitor(logger)
+
+app.get('/status', (req, res) => {
+  res.send('history-v1-backup-worker is up')
+})
+
+app.get(
+  '/health_check',
+  expressify(async (req, res) => {
+    await healthCheck()
+    res.sendStatus(200)
+  })
+)
+
+app.use((err, req, res, next) => {
+  req.logger.addFields({ err })
+  req.logger.setLevel('error')
+  next(err)
+})
+
+async function triggerGracefulShutdown(server, signal) {
+  logger.info({ signal }, 'graceful shutdown: started shutdown sequence')
+  await drainQueue()
+  server.close(function () {
+    logger.info({ signal }, 'graceful shutdown: closed server')
+    setTimeout(() => {
+      process.exit(0)
+    }, 1000)
+  })
+}
+
+/**
+ * @param {number} port
+ * @return {Promise<http.Server>}
+ */
+export async function startApp(port) {
+  await healthCheck()
+  const server = http.createServer(app)
+  await promisify(server.listen.bind(server, port))()
+  const signals = ['SIGINT', 'SIGTERM']
+  signals.forEach(signal => {
+    process.on(signal, () => triggerGracefulShutdown(server, signal))
+  })
+  return server
+}
+
+// Run this if we're called directly
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  const PORT = parseInt(process.env.PORT || '3103', 10)
+  await startApp(PORT)
+}
--- a/services/history-v1/backupVerifier/ProjectMetrics.mjs
+++ b/services/history-v1/backupVerifier/ProjectMetrics.mjs
@@ -0,0 +1,33 @@
+import Metrics from '@overleaf/metrics'
+import { objectIdFromDate } from './utils.mjs'
+import { db } from '../storage/lib/mongodb.js'
+
+const projectsCollection = db.collection('projects')
+
+/**
+ *
+ * @param {Date} beforeTime
+ * @return {Promise<void>}
+ */
+export async function measurePendingChangesBeforeTime(beforeTime) {
+  const pendingChangeCount = await projectsCollection.countDocuments({
+    'overleaf.backup.pendingChangeAt': {
+      $lt: beforeTime,
+    },
+  })
+
+  Metrics.gauge('backup_verification_pending_changes', pendingChangeCount)
+}
+
+/**
+ *
+ * @param {Date} graceTime
+ * @return {Promise<void>}
+ */
+export async function measureNeverBackedUpProjects(graceTime) {
+  const neverBackedUpCount = await projectsCollection.countDocuments({
+    'overleaf.backup.lastBackedUpVersion': null,
+    _id: { $lt: objectIdFromDate(graceTime) },
+  })
+  Metrics.gauge('backup_verification_never_backed_up', neverBackedUpCount)
+}
--- a/services/history-v1/backupVerifier/ProjectSampler.mjs
+++ b/services/history-v1/backupVerifier/ProjectSampler.mjs
@@ -0,0 +1,79 @@
+// @ts-check
+import { objectIdFromDate } from './utils.mjs'
+import { db } from '../storage/lib/mongodb.js'
+import config from 'config'
+
+const projectsCollection = db.collection('projects')
+
+const HAS_PROJECTS_WITHOUT_HISTORY =
+  config.get('hasProjectsWithoutHistory') === 'true'
+
+/**
+ * @param {Date} start
+ * @param {Date} end
+ * @param {number} N
+ * @yields {string}
+ */
+export async function* getProjectsCreatedInDateRangeCursor(start, end, N) {
+  yield* getSampleProjectsCursor(N, [
+    {
+      $match: {
+        _id: {
+          $gt: objectIdFromDate(start),
+          $lte: objectIdFromDate(end),
+        },
+      },
+    },
+  ])
+}
+
+export async function* getProjectsUpdatedInDateRangeCursor(start, end, N) {
+  yield* getSampleProjectsCursor(N, [
+    {
+      $match: {
+        'overleaf.history.updatedAt': {
+          $gt: start,
+          $lte: end,
+        },
+      },
+    },
+  ])
+}
+
+/**
+ * @typedef {import('mongodb').Document} Document
+ */
+
+/**
+ *
+ * @generator
+ * @param {number} N
+ * @param {Array<Document>} preSampleAggregationStages
+ * @yields {string}
+ */
+export async function* getSampleProjectsCursor(
+  N,
+  preSampleAggregationStages = []
+) {
+  const cursor = projectsCollection.aggregate([
+    ...preSampleAggregationStages,
+    { $sample: { size: N } },
+    { $project: { 'overleaf.history.id': 1 } },
+  ])
+
+  let validProjects = 0
+  let hasInvalidProject = false
+
+  for await (const project of cursor) {
+    if (HAS_PROJECTS_WITHOUT_HISTORY && !project.overleaf?.history?.id) {
+      hasInvalidProject = true
+      continue
+    }
+    validProjects++
+    yield project.overleaf.history.id.toString()
+  }
+
+  if (validProjects === 0 && hasInvalidProject) {
+    yield* getSampleProjectsCursor(N, preSampleAggregationStages)
+  }
+}
--- a/services/history-v1/backupVerifier/ProjectVerifier.mjs
+++ b/services/history-v1/backupVerifier/ProjectVerifier.mjs
@@ -0,0 +1,320 @@
+// @ts-check
+import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
+import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
+import logger from '@overleaf/logger'
+import metrics from '@overleaf/metrics'
+import {
+  getSampleProjectsCursor,
+  getProjectsCreatedInDateRangeCursor,
+  getProjectsUpdatedInDateRangeCursor,
+} from './ProjectSampler.mjs'
+import OError from '@overleaf/o-error'
+import { setTimeout } from 'node:timers/promises'
+
+const MS_PER_30_DAYS = 30 * 24 * 60 * 60 * 1000
+
+const failureCounter = new metrics.prom.Counter({
+  name: 'backup_project_verification_failed',
+  help: 'Number of projects that failed verification',
+  labelNames: ['name'],
+})
+
+const successCounter = new metrics.prom.Counter({
+  name: 'backup_project_verification_succeeded',
+  help: 'Number of projects that succeeded verification',
+})
+
+let WRITE_METRICS = false
+
+/**
+ * @typedef {import('node:events').EventEmitter} EventEmitter
+ */
+
+/**
+ * Allows writing metrics to be enabled or disabled.
+ * @param {Boolean} writeMetrics
+ */
+export function setWriteMetrics(writeMetrics) {
+  WRITE_METRICS = writeMetrics
+}
+
+/**
+ *
+ * @param {Error|unknown} error
+ * @param {string} historyId
+ */
+function handleVerificationError(error, historyId) {
+  const name = error instanceof Error ? error.name : 'UnknownError'
+  logger.error({ historyId, error, name }, 'error verifying project backup')
+
+  WRITE_METRICS && failureCounter.inc({ name })
+
+  return name
+}
+
+/**
+ *
+ * @param {Date} startDate
+ * @param {Date} endDate
+ * @param {number} interval
+ * @returns {Array<VerificationJobSpecification>}
+ */
+function splitJobs(startDate, endDate, interval) {
+  /** @type {Array<VerificationJobSpecification>} */
+  const jobs = []
+  while (startDate < endDate) {
+    const nextStart = new Date(
+      Math.min(startDate.getTime() + interval, endDate.getTime())
+    )
+    jobs.push({ startDate, endDate: nextStart })
+    startDate = nextStart
+  }
+  return jobs
+}
+
+/**
+ *
+ * @param {AsyncGenerator<string>} historyIdCursor
+ * @param {EventEmitter} [eventEmitter]
+ * @param {number} [delay] - Allows a delay between each verification
+ * @return {Promise<{verified: number, total: number, errorTypes: *[], hasFailure: boolean}>}
+ */
+async function verifyProjectsFromCursor(
+  historyIdCursor,
+  eventEmitter,
+  delay = 0
+) {
+  const errorTypes = []
+  let verified = 0
+  let total = 0
+  let receivedShutdownSignal = false
+  if (eventEmitter) {
+    eventEmitter.once('shutdown', () => {
+      receivedShutdownSignal = true
+    })
+  }
+  for await (const historyId of historyIdCursor) {
+    if (receivedShutdownSignal) {
+      break
+    }
+    total++
+    try {
+      await verifyProjectWithErrorContext(historyId)
+      logger.debug({ historyId }, 'verified project backup successfully')
+      WRITE_METRICS && successCounter.inc()
+      verified++
+    } catch (error) {
+      const errorType = handleVerificationError(error, historyId)
+      errorTypes.push(errorType)
+    }
+    if (delay > 0) {
+      await setTimeout(delay)
+    }
+  }
+  return {
+    verified,
+    total,
+    errorTypes,
+    hasFailure: errorTypes.length > 0,
+  }
+}
+
+/**
+ *
+ * @param {number} nProjectsToSample
+ * @param {EventEmitter} [signal]
+ * @param {number} [delay]
+ * @return {Promise<VerificationJobStatus>}
+ */
+export async function verifyRandomProjectSample(
+  nProjectsToSample,
+  signal,
+  delay = 0
+) {
+  const historyIds = await getSampleProjectsCursor(nProjectsToSample)
+  return await verifyProjectsFromCursor(historyIds, signal, delay)
+}
+
+/**
+ * Samples projects with history IDs between the specified dates and verifies them.
+ *
+ * @param {Date} startDate
+ * @param {Date} endDate
+ * @param {number} projectsPerRange
+ * @param {EventEmitter} [signal]
+ * @return {Promise<VerificationJobStatus>}
+ */
+async function verifyRange(startDate, endDate, projectsPerRange, signal) {
+  logger.info({ startDate, endDate }, 'verifying range')
+
+  const results = await verifyProjectsFromCursor(
+    getProjectsCreatedInDateRangeCursor(startDate, endDate, projectsPerRange),
+    signal
+  )
+
+  if (results.total === 0) {
+    logger.debug(
+      { start: startDate, end: endDate },
+      'No projects found in range'
+    )
+  }
+
+  const jobStatus = {
+    ...results,
+    startDate,
+    endDate,
+  }
+
+  logger.debug(
+    { ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
+    'Verified range'
+  )
+  return jobStatus
+}
+
+/**
+ * @typedef {Object} VerificationJobSpecification
+ * @property {Date} startDate
+ * @property {Date} endDate
+ */
+
+/**
+ * @typedef {import('./types.d.ts').VerificationJobStatus} VerificationJobStatus
+ */
+
+/**
+ * @typedef {Object} VerifyDateRangeOptions
+ * @property {Date} startDate
+ * @property {Date} endDate
+ * @property {number} [interval]
+ * @property {number} [projectsPerRange]
+ * @property {number} [concurrency]
+ * @property {EventEmitter} [signal]
+ */
+
+/**
+ *
+ * @param {VerifyDateRangeOptions} options
+ * @return {Promise<VerificationJobStatus>}
+ */
+export async function verifyProjectsCreatedInDateRange({
+  concurrency = 0,
+  projectsPerRange = 10,
+  startDate,
+  endDate,
+  interval = MS_PER_30_DAYS,
+  signal,
+}) {
+  const jobs = splitJobs(startDate, endDate, interval)
+  if (jobs.length === 0) {
+    throw new OError('Time range could not be split into jobs', {
+      start: startDate,
+      end: endDate,
+      interval,
+    })
+  }
+  const settlements = await promiseMapSettledWithLimit(
+    concurrency,
+    jobs,
+    ({ startDate, endDate }) =>
+      verifyRange(startDate, endDate, projectsPerRange, signal)
+  )
+  return settlements.reduce(
+    /**
+     *
+     * @param {VerificationJobStatus} acc
+     * @param settlement
+     * @return {VerificationJobStatus}
+     */
+    (acc, settlement) => {
+      if (settlement.status !== 'rejected') {
+        if (settlement.value.hasFailure) {
+          acc.hasFailure = true
+        }
+        acc.total += settlement.value.total
+        acc.verified += settlement.value.verified
+        acc.errorTypes = acc.errorTypes.concat(settlement.value.errorTypes)
+      } else {
+        logger.error({ ...settlement.reason }, 'Error processing range')
+      }
+      return acc
+    },
+    /** @type {VerificationJobStatus} */
+    {
+      startDate,
+      endDate,
+      verified: 0,
+      total: 0,
+      hasFailure: false,
+      errorTypes: [],
+    }
+  )
+}
+
+/**
+ * Verifies that projects that have recently gone out of RPO have been updated.
+ *
+ * @param {Date} startDate
+ * @param {Date} endDate
+ * @param {number} nProjects
+ * @param {EventEmitter} [signal]
+ * @return {Promise<VerificationJobStatus>}
+ */
+export async function verifyProjectsUpdatedInDateRange(
+  startDate,
+  endDate,
+  nProjects,
+  signal
+) {
+  logger.debug(
+    { startDate, endDate, nProjects },
+    'Sampling projects updated in date range'
+  )
+  const results = await verifyProjectsFromCursor(
+    getProjectsUpdatedInDateRangeCursor(startDate, endDate, nProjects),
+    signal
+  )
+
+  if (results.total === 0) {
+    logger.debug(
+      { start: startDate, end: endDate },
+      'No projects updated recently'
+    )
+  }
+
+  const jobStatus = {
+    ...results,
+    startDate,
+    endDate,
+  }
+
+  logger.debug(
+    { ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
+    'Verified recently updated projects'
+  )
+  return jobStatus
+}
+
+/**
+ *
+ * @param {EventEmitter} signal
+ * @return {void}
+ */
+export function loopRandomProjects(signal) {
+  let shutdown = false
+  signal.on('shutdown', function () {
+    shutdown = true
+  })
+  async function loop() {
+    do {
+      try {
+        const result = await verifyRandomProjectSample(100, signal, 2_000)
+        logger.debug({ result }, 'verified random project sample')
+      } catch (error) {
+        logger.error({ error }, 'error verifying random project sample')
+      }
+      // eslint-disable-next-line no-unmodified-loop-condition
+    } while (!shutdown)
+  }
+  loop()
+}
--- a/services/history-v1/backupVerifier/healthCheck.mjs
+++ b/services/history-v1/backupVerifier/healthCheck.mjs
@@ -0,0 +1,32 @@
+import config from 'config'
+import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
+import {
+  measureNeverBackedUpProjects,
+  measurePendingChangesBeforeTime,
+} from './ProjectMetrics.mjs'
+import { getEndDateForRPO, RPO } from './utils.mjs'
+
+/** @type {Array<string>} */
+const HEALTH_CHECK_PROJECTS = JSON.parse(config.get('healthCheckProjects'))
+
+export async function healthCheck() {
+  if (!Array.isArray(HEALTH_CHECK_PROJECTS)) {
+    throw new Error('expected healthCheckProjects to be an array')
+  }
+  if (HEALTH_CHECK_PROJECTS.length !== 2) {
+    throw new Error('expected 2 healthCheckProjects')
+  }
+  if (!HEALTH_CHECK_PROJECTS.some(id => id.length === 24)) {
+    throw new Error('expected mongo id in healthCheckProjects')
+  }
+  if (!HEALTH_CHECK_PROJECTS.some(id => id.length < 24)) {
+    throw new Error('expected postgres id in healthCheckProjects')
+  }
+
+  for (const historyId of HEALTH_CHECK_PROJECTS) {
+    await verifyProjectWithErrorContext(historyId)
+  }
+
+  await measurePendingChangesBeforeTime(getEndDateForRPO(2))
+  await measureNeverBackedUpProjects(getEndDateForRPO(2))
+}
--- a/services/history-v1/backupVerifier/types.d.ts
+++ b/services/history-v1/backupVerifier/types.d.ts
@@ -0,0 +1,8 @@
+export type VerificationJobStatus = {
+  verified: number
+  total: number
+  startDate?: Date
+  endDate?: Date
+  hasFailure: boolean
+  errorTypes: Array<string>
+}
--- a/services/history-v1/backupVerifier/utils.mjs
+++ b/services/history-v1/backupVerifier/utils.mjs
@@ -0,0 +1,35 @@
+import { ObjectId } from 'mongodb'
+import config from 'config'
+
+export const RPO = parseInt(config.get('backupRPOInMS'), 10)
+
+/**
+ * @param {Date} time
+ * @return {ObjectId}
+ */
+export function objectIdFromDate(time) {
+  return ObjectId.createFromTime(time.getTime() / 1000)
+}
+
+/**
+ * @param {number} [factor] - Multiply RPO by this factor, default is 1
+ * @return {Date}
+ */
+export function getEndDateForRPO(factor = 1) {
+  return new Date(Date.now() - RPO * factor)
+}
+
+/**
+ * Creates a startDate, endDate pair that checks a period of time before the RPO horizon
+ *
+ * @param {number} offset - How many seconds we should check
+ * @return {{endDate: Date, startDate: Date}}
+ */
+export function getDatesBeforeRPO(offset) {
+  const now = new Date()
+  const endDate = new Date(now.getTime() - RPO)
+  return {
+    endDate,
+    startDate: new Date(endDate.getTime() - offset * 1000),
+  }
+}
--- a/services/history-v1/benchmarks/blob_store.js
+++ b/services/history-v1/benchmarks/blob_store.js
@@ -0,0 +1,82 @@
+const crypto = require('node:crypto')
+const benny = require('benny')
+const { Blob } = require('overleaf-editor-core')
+const mongoBackend = require('../storage/lib/blob_store/mongo')
+const postgresBackend = require('../storage/lib/blob_store/postgres')
+const cleanup = require('../test/acceptance/js/storage/support/cleanup')
+
+const MONGO_PROJECT_ID = '637386deb4ce3c62acd3848e'
+const POSTGRES_PROJECT_ID = '123'
+
+async function run() {
+  for (const blobCount of [1, 10, 100, 1000, 10000, 100000, 500000]) {
+    await cleanup.everything()
+    const blobs = createBlobs(blobCount)
+    await insertBlobs(blobs)
+    const randomHashes = getRandomHashes(blobs, 100)
+    await benny.suite(
+      `Read a blob in a project with ${blobCount} blobs`,
+      benny.add('Mongo backend', async () => {
+        await mongoBackend.findBlob(MONGO_PROJECT_ID, randomHashes[0])
+      }),
+      benny.add('Postgres backend', async () => {
+        await postgresBackend.findBlob(POSTGRES_PROJECT_ID, randomHashes[0])
+      }),
+      benny.cycle(),
+      benny.complete()
+    )
+    await benny.suite(
+      `Read 100 blobs in a project with ${blobCount} blobs`,
+      benny.add('Mongo backend', async () => {
+        await mongoBackend.findBlobs(MONGO_PROJECT_ID, randomHashes)
+      }),
+      benny.add('Postgres backend', async () => {
+        await postgresBackend.findBlobs(POSTGRES_PROJECT_ID, randomHashes)
+      }),
+      benny.cycle(),
+      benny.complete()
+    )
+    await benny.suite(
+      `Insert a blob in a project with ${blobCount} blobs`,
+      benny.add('Mongo backend', async () => {
+        const [newBlob] = createBlobs(1)
+        await mongoBackend.insertBlob(MONGO_PROJECT_ID, newBlob)
+      }),
+      benny.add('Postgres backend', async () => {
+        const [newBlob] = createBlobs(1)
+        await postgresBackend.insertBlob(POSTGRES_PROJECT_ID, newBlob)
+      }),
+      benny.cycle(),
+      benny.complete()
+    )
+  }
+}
+
+function createBlobs(blobCount) {
+  const blobs = []
+  for (let i = 0; i < blobCount; i++) {
+    const hash = crypto.randomBytes(20).toString('hex')
+    blobs.push(new Blob(hash, 42, 42))
+  }
+  return blobs
+}
+
+async function insertBlobs(blobs) {
+  for (const blob of blobs) {
+    await Promise.all([
+      mongoBackend.insertBlob(MONGO_PROJECT_ID, blob),
+      postgresBackend.insertBlob(POSTGRES_PROJECT_ID, blob),
+    ])
+  }
+}
+
+function getRandomHashes(blobs, count) {
+  const hashes = []
+  for (let i = 0; i < count; i++) {
+    const index = Math.floor(Math.random() * blobs.length)
+    hashes.push(blobs[index].getHash())
+  }
+  return hashes
+}
+
+module.exports = run
--- a/services/history-v1/benchmarks/index.js
+++ b/services/history-v1/benchmarks/index.js
@@ -0,0 +1,17 @@
+const testSetup = require('../test/setup')
+const blobStoreSuite = require('./blob_store')
+
+async function main() {
+  await testSetup.setupPostgresDatabase()
+  await testSetup.createGcsBuckets()
+  await blobStoreSuite()
+}
+
+main()
+  .then(() => {
+    process.exit(0)
+  })
+  .catch(err => {
+    console.error(err)
+    process.exit(1)
+  })
--- a/services/history-v1/buildscript.txt
+++ b/services/history-v1/buildscript.txt
@@ -0,0 +1,10 @@
+history-v1
+--dependencies=postgres,gcs,mongo,redis,s3
+--docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker
+--env-add=
+--env-pass-through=
+--esmock-loader=False
+--node-version=20.18.2
+--public-repo=False
+--script-version=4.7.0
+--tsconfig-extra-includes=backup-deletion-app.mjs,backup-verifier-app.mjs,backup-worker-app.mjs,api/**/*,migrations/**/*,storage/**/*
--- a/services/history-v1/config/custom-environment-variables.json
+++ b/services/history-v1/config/custom-environment-variables.json
@@ -0,0 +1,104 @@
+{
+  "databaseUrl": "HISTORY_CONNECTION_STRING",
+  "databaseUrlReadOnly": "HISTORY_FOLLOWER_CONNECTION_STRING",
+  "herokuDatabaseUrl": "DATABASE_URL",
+  "databasePoolMin": "DATABASE_POOL_MIN",
+  "databasePoolMax": "DATABASE_POOL_MAX",
+  "persistor": {
+    "backend": "PERSISTOR_BACKEND",
+    "s3": {
+      "key": "AWS_ACCESS_KEY_ID",
+      "secret": "AWS_SECRET_ACCESS_KEY",
+      "endpoint": "AWS_S3_ENDPOINT",
+      "pathStyle": "AWS_S3_PATH_STYLE",
+      "maxRetries": "S3_MAX_RETRIES",
+      "httpOptions": {
+        "timeout": "S3_TIMEOUT"
+      }
+    },
+    "gcs": {
+      "deletedBucketSuffix": "GCS_DELETED_BUCKET_SUFFIX",
+      "unlockBeforeDelete": "GCS_UNLOCK_BEFORE_DELETE",
+      "endpoint": {
+        "apiEndpoint": "GCS_API_ENDPOINT",
+        "projectId": "GCS_PROJECT_ID"
+      },
+      "retryOptions": {
+        "maxRetries": "GCS_MAX_RETRIES",
+        "idempotencyStrategy": "GCS_IDEMPOTENCY_STRATEGY"
+      }
+    },
+    "fallback": {
+      "backend": "PERSISTOR_FALLBACK_BACKEND",
+      "buckets": "PERSISTOR_BUCKET_MAPPING"
+    }
+  },
+  "backupPersistor": {
+    "keyEncryptionKeys": "BACKUP_KEY_ENCRYPTION_KEYS",
+    "s3SSEC": {
+      "key": "AWS_ACCESS_KEY_ID",
+      "secret": "AWS_SECRET_ACCESS_KEY",
+      "endpoint": "AWS_S3_ENDPOINT",
+      "pathStyle": "AWS_S3_PATH_STYLE",
+      "maxRetries": "BACKUP_S3_MAX_RETRIES",
+      "httpOptions": {
+        "timeout": "BACKUP_S3_TIMEOUT"
+      }
+    }
+  },
+  "blobStore": {
+    "globalBucket": "OVERLEAF_EDITOR_BLOBS_BUCKET",
+    "projectBucket": "OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET"
+  },
+  "chunkStore": {
+    "historyStoreConcurrency": "HISTORY_STORE_CONCURRENCY",
+    "bucket": "OVERLEAF_EDITOR_CHUNKS_BUCKET"
+  },
+  "zipStore": {
+    "bucket": "OVERLEAF_EDITOR_ZIPS_BUCKET",
+    "zipTimeoutMs": "ZIP_STORE_ZIP_TIMEOUT_MS"
+  },
+  "backupStore": {
+    "chunksBucket":"BACKUP_OVERLEAF_EDITOR_CHUNKS_BUCKET",
+    "deksBucket":"BACKUP_OVERLEAF_EDITOR_DEKS_BUCKET",
+    "globalBlobsBucket":"BACKUP_OVERLEAF_EDITOR_GLOBAL_BLOBS_BUCKET",
+    "projectBlobsBucket":"BACKUP_OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET"
+  },
+  "healthCheckBlobs": "HEALTH_CHECK_BLOBS",
+  "healthCheckProjects": "HEALTH_CHECK_PROJECTS",
+  "backupRPOInMS": "BACKUP_RPO_IN_MS",
+  "minSoftDeletionPeriodDays": "MIN_SOFT_DELETION_PERIOD_DAYS",
+  "mongo": {
+    "uri": "MONGO_CONNECTION_STRING"
+  },
+  "basicHttpAuth": {
+    "password": "STAGING_PASSWORD",
+    "oldPassword": "BASIC_HTTP_AUTH_OLD_PASSWORD"
+  },
+  "jwtAuth": {
+    "key": "OT_JWT_AUTH_KEY",
+    "oldKey": "OT_JWT_AUTH_OLD_KEY",
+    "algorithm": "OT_JWT_AUTH_ALG"
+  },
+  "clusterWorkers": "CLUSTER_WORKERS",
+  "maxFileUploadSize": "MAX_FILE_UPLOAD_SIZE",
+  "httpsOnly": "HTTPS_ONLY",
+  "httpRequestTimeout": "HTTP_REQUEST_TIMEOUT",
+  "redis": {
+    "queue": {
+      "host": "QUEUES_REDIS_HOST",
+      "password": "QUEUES_REDIS_PASSWORD",
+      "port": "QUEUES_REDIS_PORT"
+    },
+    "history": {
+      "host": "HISTORY_REDIS_HOST",
+      "password": "HISTORY_REDIS_PASSWORD",
+      "port": "HISTORY_REDIS_PORT"
+    },
+    "lock": {
+      "host": "REDIS_HOST",
+      "password": "REDIS_PASSWORD",
+      "port": "REDIS_PORT"
+    }
+  }
+}
--- a/services/history-v1/config/default.json
+++ b/services/history-v1/config/default.json
@@ -0,0 +1,43 @@
+{
+  "persistor": {
+    "backend": "s3",
+    "s3": {
+      "signedUrlExpiryInMs": "1800000",
+      "maxRetries": "1",
+      "httpOptions": {
+        "timeout": "8000"
+      }
+    },
+    "gcs": {
+      "signedUrlExpiryInMs": "1800000",
+      "deleteConcurrency": "50"
+    }
+  },
+  "backupPersistor": {
+    "backend": "s3SSEC",
+    "s3SSEC": {
+      "maxRetries": "1",
+      "pathStyle": false,
+      "httpOptions": {
+        "timeout": "120000"
+      }
+    }
+  },
+  "backupRPOInMS": "3600000",
+  "chunkStore": {
+    "historyStoreConcurrency": "4"
+  },
+  "zipStore": {
+    "zipTimeoutMs": "360000"
+  },
+  "hasProjectsWithoutHistory": false,
+  "minSoftDeletionPeriodDays": "90",
+  "maxDeleteKeys": "1000",
+  "useDeleteObjects": "true",
+  "clusterWorkers": "1",
+  "maxFileUploadSize": "52428800",
+  "databasePoolMin": "2",
+  "databasePoolMax": "10",
+  "httpsOnly": "false",
+  "httpRequestTimeout": "300000"
+}
--- a/services/history-v1/config/development.json
+++ b/services/history-v1/config/development.json
@@ -0,0 +1,49 @@
+{
+  "databaseUrl": "postgres://postgres:postgres@postgres/write_latex_dev",
+  "persistor": {
+    "s3": {
+      "endpoint": "http://s3:8080",
+      "pathStyle": "true"
+    },
+    "gcs": {
+      "unsignedUrls": "true",
+      "endpoint": {
+        "apiEndpoint": "http://fake-gcs:9090",
+        "projectId": "fake"
+      }
+    }
+  },
+  "blobStore": {
+    "globalBucket": "overleaf-development-blobs",
+    "projectBucket": "overleaf-development-project-blobs"
+  },
+  "chunkStore": {
+    "bucket": "overleaf-development-chunks"
+  },
+  "zipStore": {
+    "bucket": "overleaf-development-zips"
+  },
+  "backupStore": {
+    "chunksBucket":"overleaf-development-history-chunks",
+    "deksBucket":"overleaf-development-history-deks",
+    "globalBlobsBucket":"overleaf-development-history-global-blobs",
+    "projectBlobsBucket":"overleaf-development-history-project-blobs"
+  },
+  "backupPersistor": {
+    "keyEncryptionKeys": "[{\"key\":\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=\",\"salt\":\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=\"}]",
+    "s3SSEC": {
+      "ca": "[\"/certs/public.crt\"]"
+    }
+  },
+  "useDeleteObjects": "false",
+  "mongo": {
+    "uri": "mongodb://mongo:27017/sharelatex"
+  },
+  "basicHttpAuth": {
+    "password": "password"
+  },
+  "jwtAuth": {
+    "key": "secureKey",
+    "algorithm": "HS256"
+  }
+}
--- a/services/history-v1/config/production.json
+++ b/services/history-v1/config/production.json
@@ -0,0 +1,5 @@
+{
+  "backupPersistor": {
+    "tieringStorageClass": "INTELLIGENT_TIERING"
+  }
+}
--- a/services/history-v1/config/test.json
+++ b/services/history-v1/config/test.json
@@ -0,0 +1,53 @@
+{
+  "databaseUrl": "postgres://overleaf:overleaf@postgres/overleaf-history-v1-test",
+  "databaseUrlReadOnly": "postgres://read_only:password@postgres/overleaf-history-v1-test",
+  "persistor": {
+    "backend": "gcs",
+    "gcs": {
+      "unsignedUrls": "true",
+      "endpoint": {
+        "apiEndpoint": "http://gcs:9090",
+        "projectId": "fake"
+      }
+    }
+  },
+  "blobStore": {
+    "globalBucket": "overleaf-test-blobs",
+    "projectBucket": "overleaf-test-project-blobs"
+  },
+  "chunkStore": {
+    "bucket": "overleaf-test-chunks"
+  },
+  "zipStore": {
+    "bucket": "overleaf-test-zips"
+  },
+  "backupStore": {
+    "chunksBucket":"overleaf-test-history-chunks",
+    "deksBucket":"overleaf-test-history-deks",
+    "globalBlobsBucket":"overleaf-test-history-global-blobs",
+    "projectBlobsBucket":"overleaf-test-history-project-blobs"
+  },
+  "backupPersistor": {
+    "keyEncryptionKeys": "[{\"key\":\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=\",\"salt\":\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=\"}]",
+    "s3SSEC": {
+      "ca": "[\"/certs/public.crt\"]"
+    },
+    "tieringStorageClass": "REDUCED_REDUNDANCY"
+  },
+  "healthCheckBlobs": "[\"42/f70d7bba4ae1f07682e0358bd7a2068094fc023b\",\"000000000000000000000042/98d5521fe746bc2d11761edab5d0829bee286009\"]",
+  "healthCheckProjects": "[\"42\",\"000000000000000000000042\"]",
+  "backupRPOInMS": "360000",
+  "maxDeleteKeys": "3",
+  "useDeleteObjects": "false",
+  "mongo": {
+    "uri": "mongodb://mongo:27017/sharelatex"
+  },
+  "basicHttpAuth": {
+    "password": "test"
+  },
+  "jwtAuth": {
+    "key": "testtest",
+    "algorithm": "HS256"
+  },
+  "maxFileUploadSize": "524288"
+}
--- a/services/history-v1/docker-compose.ci.yml
+++ b/services/history-v1/docker-compose.ci.yml
@@ -0,0 +1,237 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+version: "2.3"
+
+services:
+  test_unit:
+    image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
+    user: node
+    command: npm run test:unit:_run
+    environment:
+      NODE_ENV: test
+      NODE_OPTIONS: "--unhandled-rejections=strict"
+
+
+  test_acceptance:
+    build: .
+    image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
+    environment:
+      ELASTIC_SEARCH_DSN: es:9200
+      REDIS_HOST: redis
+      QUEUES_REDIS_HOST: redis
+      HISTORY_REDIS_HOST: redis
+      ANALYTICS_QUEUES_REDIS_HOST: redis
+      MONGO_HOST: mongo
+      POSTGRES_HOST: postgres
+      AWS_S3_ENDPOINT: https://minio:9000
+      AWS_S3_PATH_STYLE: 'true'
+      AWS_ACCESS_KEY_ID: OVERLEAF_HISTORY_S3_ACCESS_KEY_ID
+      AWS_SECRET_ACCESS_KEY: OVERLEAF_HISTORY_S3_SECRET_ACCESS_KEY
+      MINIO_ROOT_USER: MINIO_ROOT_USER
+      MINIO_ROOT_PASSWORD: MINIO_ROOT_PASSWORD
+      GCS_API_ENDPOINT: http://gcs:9090
+      GCS_PROJECT_ID: fake
+      STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
+      MOCHA_GREP: ${MOCHA_GREP}
+      NODE_ENV: test
+      NODE_OPTIONS: "--unhandled-rejections=strict"
+    volumes:
+      - ./test/acceptance/certs:/certs
+    depends_on:
+      mongo:
+        condition: service_started
+      redis:
+        condition: service_healthy
+      postgres:
+        condition: service_healthy
+      certs:
+        condition: service_completed_successfully
+      minio:
+        condition: service_started
+      minio_setup:
+        condition: service_completed_successfully
+      gcs:
+        condition: service_healthy
+    user: node
+    command: npm run test:acceptance
+
+
+  tar:
+    build: .
+    image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
+    volumes:
+      - ./:/tmp/build/
+    command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
+    user: root
+  redis:
+    image: redis
+    healthcheck:
+      test: ping="$$(redis-cli ping)" && [ "$$ping" = 'PONG' ]
+      interval: 1s
+      retries: 20
+
+  mongo:
+    image: mongo:6.0.13
+    command: --replSet overleaf
+    volumes:
+      - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+    environment:
+      MONGO_INITDB_DATABASE: sharelatex
+    extra_hosts:
+      # Required when using the automatic database setup for initializing the
+      # replica set. This override is not needed when running the setup after
+      # starting up mongo.
+      - mongo:127.0.0.1
+  postgres:
+    image: postgres:10
+    environment:
+      POSTGRES_USER: overleaf
+      POSTGRES_PASSWORD: overleaf
+      POSTGRES_DB: overleaf-history-v1-test
+    volumes:
+      - ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/
+    healthcheck:
+      test: pg_isready --quiet
+      interval: 1s
+      retries: 20
+
+  certs:
+    image: node:20.18.2
+    volumes:
+      - ./test/acceptance/certs:/certs
+    working_dir: /certs
+    entrypoint: sh
+    command:
+      - '-cex'
+      - |
+        if [ ! -f ./certgen ]; then
+          wget -O ./certgen "https://github.com/minio/certgen/releases/download/v1.3.0/certgen-linux-$(dpkg --print-architecture)"
+          chmod +x ./certgen
+        fi
+        if [ ! -f private.key ] || [ ! -f public.crt ]; then
+          ./certgen -host minio
+        fi
+
+  minio:
+    image: minio/minio:RELEASE.2024-10-13T13-34-11Z
+    command: server /data
+    volumes:
+      - ./test/acceptance/certs:/root/.minio/certs
+    environment:
+      MINIO_ROOT_USER: MINIO_ROOT_USER
+      MINIO_ROOT_PASSWORD: MINIO_ROOT_PASSWORD
+    depends_on:
+      certs:
+        condition: service_completed_successfully
+
+  minio_setup:
+    depends_on:
+      certs:
+        condition: service_completed_successfully
+      minio:
+        condition: service_started
+    image: minio/mc:RELEASE.2024-10-08T09-37-26Z
+    volumes:
+      - ./test/acceptance/certs:/root/.mc/certs/CAs
+    entrypoint: sh
+    command:
+      - '-cex'
+      - |
+        sleep 1
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD \
+        || sleep 3 && \
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD \
+        || sleep 3 && \
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD \
+        || sleep 3 && \
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD
+        mc mb --ignore-existing s3/overleaf-test-history-chunks
+        mc mb --ignore-existing s3/overleaf-test-history-deks
+        mc mb --ignore-existing s3/overleaf-test-history-global-blobs
+        mc mb --ignore-existing s3/overleaf-test-history-project-blobs
+        mc admin user add s3 \
+          OVERLEAF_HISTORY_S3_ACCESS_KEY_ID \
+          OVERLEAF_HISTORY_S3_SECRET_ACCESS_KEY
+        echo '
+          {
+            "Version": "2012-10-17",
+            "Statement": [
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-chunks"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-chunks/*"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-deks"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-deks/*"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-global-blobs"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-global-blobs/*"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-project-blobs"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-project-blobs/*"
+              }
+            ]
+          }' > policy-history.json
+
+        mc admin policy create s3 overleaf-history policy-history.json
+        mc admin policy attach s3 overleaf-history \
+          --user=OVERLEAF_HISTORY_S3_ACCESS_KEY_ID
+  gcs:
+    image: fsouza/fake-gcs-server:1.45.2
+    command: ["--port=9090", "--scheme=http"]
+    healthcheck:
+      test: wget --quiet --output-document=/dev/null http://localhost:9090/storage/v1/b
+      interval: 1s
+      retries: 20
--- a/services/history-v1/docker-compose.yml
+++ b/services/history-v1/docker-compose.yml
@@ -0,0 +1,246 @@
+# This file was auto-generated, do not edit it directly.
+# Instead run bin/update_build_scripts from
+# https://github.com/overleaf/internal/
+
+version: "2.3"
+
+services:
+  test_unit:
+    build:
+      context: ../..
+      dockerfile: services/history-v1/Dockerfile
+      target: base
+    volumes:
+      - .:/overleaf/services/history-v1
+      - ../../node_modules:/overleaf/node_modules
+      - ../../libraries:/overleaf/libraries
+    working_dir: /overleaf/services/history-v1
+    environment:
+      MOCHA_GREP: ${MOCHA_GREP}
+      LOG_LEVEL: ${LOG_LEVEL:-}
+      NODE_ENV: test
+      NODE_OPTIONS: "--unhandled-rejections=strict"
+    command: npm run --silent test:unit
+    user: node
+
+  test_acceptance:
+    build:
+      context: ../..
+      dockerfile: services/history-v1/Dockerfile
+      target: base
+    volumes:
+      - .:/overleaf/services/history-v1
+      - ../../node_modules:/overleaf/node_modules
+      - ../../libraries:/overleaf/libraries
+      - ./test/acceptance/certs:/certs
+    working_dir: /overleaf/services/history-v1
+    environment:
+      ELASTIC_SEARCH_DSN: es:9200
+      REDIS_HOST: redis
+      HISTORY_REDIS_HOST: redis
+      QUEUES_REDIS_HOST: redis
+      ANALYTICS_QUEUES_REDIS_HOST: redis
+      MONGO_HOST: mongo
+      POSTGRES_HOST: postgres
+      AWS_S3_ENDPOINT: https://minio:9000
+      AWS_S3_PATH_STYLE: 'true'
+      AWS_ACCESS_KEY_ID: OVERLEAF_HISTORY_S3_ACCESS_KEY_ID
+      AWS_SECRET_ACCESS_KEY: OVERLEAF_HISTORY_S3_SECRET_ACCESS_KEY
+      MINIO_ROOT_USER: MINIO_ROOT_USER
+      MINIO_ROOT_PASSWORD: MINIO_ROOT_PASSWORD
+      GCS_API_ENDPOINT: http://gcs:9090
+      GCS_PROJECT_ID: fake
+      STORAGE_EMULATOR_HOST: http://gcs:9090/storage/v1
+      MOCHA_GREP: ${MOCHA_GREP}
+      LOG_LEVEL: ${LOG_LEVEL:-}
+      NODE_ENV: test
+      NODE_OPTIONS: "--unhandled-rejections=strict"
+    user: node
+    depends_on:
+      mongo:
+        condition: service_started
+      redis:
+        condition: service_healthy
+      postgres:
+        condition: service_healthy
+      certs:
+        condition: service_completed_successfully
+      minio:
+        condition: service_started
+      minio_setup:
+        condition: service_completed_successfully
+      gcs:
+        condition: service_healthy
+    command: npm run --silent test:acceptance
+
+  redis:
+    image: redis
+    healthcheck:
+      test: ping=$$(redis-cli ping) && [ "$$ping" = 'PONG' ]
+      interval: 1s
+      retries: 20
+
+  mongo:
+    image: mongo:6.0.13
+    command: --replSet overleaf
+    volumes:
+      - ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
+    environment:
+      MONGO_INITDB_DATABASE: sharelatex
+    extra_hosts:
+      # Required when using the automatic database setup for initializing the
+      # replica set. This override is not needed when running the setup after
+      # starting up mongo.
+      - mongo:127.0.0.1
+
+  postgres:
+    image: postgres:10
+    environment:
+      POSTGRES_USER: overleaf
+      POSTGRES_PASSWORD: overleaf
+      POSTGRES_DB: overleaf-history-v1-test
+    volumes:
+      - ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/
+    healthcheck:
+      test: pg_isready --host=localhost --quiet
+      interval: 1s
+      retries: 20
+
+  certs:
+    image: node:20.18.2
+    volumes:
+      - ./test/acceptance/certs:/certs
+    working_dir: /certs
+    entrypoint: sh
+    command:
+      - '-cex'
+      - |
+        if [ ! -f ./certgen ]; then
+          wget -O ./certgen "https://github.com/minio/certgen/releases/download/v1.3.0/certgen-linux-$(dpkg --print-architecture)"
+          chmod +x ./certgen
+        fi
+        if [ ! -f private.key ] || [ ! -f public.crt ]; then
+          ./certgen -host minio
+        fi
+
+  minio:
+    image: minio/minio:RELEASE.2024-10-13T13-34-11Z
+    command: server /data
+    volumes:
+      - ./test/acceptance/certs:/root/.minio/certs
+    environment:
+      MINIO_ROOT_USER: MINIO_ROOT_USER
+      MINIO_ROOT_PASSWORD: MINIO_ROOT_PASSWORD
+    depends_on:
+      certs:
+        condition: service_completed_successfully
+
+  minio_setup:
+    depends_on:
+      certs:
+        condition: service_completed_successfully
+      minio:
+        condition: service_started
+    image: minio/mc:RELEASE.2024-10-08T09-37-26Z
+    volumes:
+      - ./test/acceptance/certs:/root/.mc/certs/CAs
+    entrypoint: sh
+    command:
+      - '-cex'
+      - |
+        sleep 1
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD \
+        || sleep 3 && \
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD \
+        || sleep 3 && \
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD \
+        || sleep 3 && \
+        mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD
+        mc mb --ignore-existing s3/overleaf-test-history-chunks
+        mc mb --ignore-existing s3/overleaf-test-history-deks
+        mc mb --ignore-existing s3/overleaf-test-history-global-blobs
+        mc mb --ignore-existing s3/overleaf-test-history-project-blobs
+        mc admin user add s3 \
+          OVERLEAF_HISTORY_S3_ACCESS_KEY_ID \
+          OVERLEAF_HISTORY_S3_SECRET_ACCESS_KEY
+        echo '
+          {
+            "Version": "2012-10-17",
+            "Statement": [
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-chunks"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-chunks/*"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-deks"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-deks/*"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-global-blobs"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-global-blobs/*"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:ListBucket"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-project-blobs"
+              },
+              {
+                "Effect": "Allow",
+                "Action": [
+                  "s3:PutObject",
+                  "s3:GetObject",
+                  "s3:DeleteObject"
+                ],
+                "Resource": "arn:aws:s3:::overleaf-test-history-project-blobs/*"
+              }
+            ]
+          }' > policy-history.json
+
+        mc admin policy create s3 overleaf-history policy-history.json
+        mc admin policy attach s3 overleaf-history \
+          --user=OVERLEAF_HISTORY_S3_ACCESS_KEY_ID
+  gcs:
+    image: fsouza/fake-gcs-server:1.45.2
+    command: ["--port=9090", "--scheme=http"]
+    healthcheck:
+      test: wget --quiet --output-document=/dev/null http://localhost:9090/storage/v1/b
+      interval: 1s
+      retries: 20
--- a/services/history-v1/install_deps.sh
+++ b/services/history-v1/install_deps.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+set -ex
+
+apt-get update
+
+apt-get install jq parallel --yes
+
+rm -rf /var/lib/apt/lists/*
--- a/services/history-v1/knexfile.js
+++ b/services/history-v1/knexfile.js
@@ -0,0 +1,19 @@
+const config = require('config')
+
+const baseConfig = {
+  client: 'postgresql',
+  connection: config.herokuDatabaseUrl || config.databaseUrl,
+  pool: {
+    min: parseInt(config.databasePoolMin, 10),
+    max: parseInt(config.databasePoolMax, 10),
+  },
+  migrations: {
+    tableName: 'knex_migrations',
+  },
+}
+
+module.exports = {
+  development: baseConfig,
+  production: baseConfig,
+  test: baseConfig,
+}
--- a/services/history-v1/migrations/20220228163642_initial.js
+++ b/services/history-v1/migrations/20220228163642_initial.js
@@ -0,0 +1,80 @@
+/**
+ * This is the initial migration, meant to replicate the current state of the
+ * history database. If tables already exist, this migration is a noop.
+ */
+
+exports.up = async function (knex) {
+  await knex.raw(`
+    CREATE TABLE IF NOT EXISTS chunks (
+      id SERIAL,
+      doc_id integer NOT NULL,
+      end_version integer NOT NULL,
+      end_timestamp timestamp without time zone,
+      CONSTRAINT chunks_version_non_negative CHECK (end_version >= 0)
+    )
+  `)
+  await knex.raw(`
+    CREATE UNIQUE INDEX IF NOT EXISTS index_chunks_on_doc_id_and_end_version
+    ON chunks (doc_id, end_version)
+  `)
+
+  await knex.raw(`
+    CREATE TABLE IF NOT EXISTS old_chunks (
+      chunk_id integer NOT NULL PRIMARY KEY,
+      doc_id integer NOT NULL,
+      end_version integer,
+      end_timestamp timestamp without time zone,
+      deleted_at timestamp without time zone
+    )
+  `)
+  await knex.raw(`
+    CREATE INDEX IF NOT EXISTS index_old_chunks_on_doc_id_and_end_version
+    ON old_chunks (doc_id, end_version)
+  `)
+
+  await knex.raw(`
+    CREATE TABLE IF NOT EXISTS pending_chunks (
+      id SERIAL,
+      doc_id integer NOT NULL,
+      end_version integer NOT NULL,
+      end_timestamp timestamp without time zone,
+      CONSTRAINT chunks_version_non_negative CHECK (end_version >= 0)
+    )
+  `)
+  await knex.raw(`
+    CREATE INDEX IF NOT EXISTS index_pending_chunks_on_doc_id_and_id
+    ON pending_chunks (doc_id, id)
+  `)
+
+  await knex.raw(`
+    CREATE TABLE IF NOT EXISTS blobs (
+      hash_bytes bytea NOT NULL PRIMARY KEY,
+      byte_length integer NOT NULL,
+      string_length integer,
+      global boolean,
+      CONSTRAINT blobs_byte_length_non_negative CHECK (byte_length >= 0),
+      CONSTRAINT blobs_string_length_non_negative
+        CHECK (string_length IS NULL OR string_length >= 0)
+    )
+  `)
+
+  await knex.raw(`
+    CREATE TABLE IF NOT EXISTS project_blobs (
+      project_id integer NOT NULL,
+      hash_bytes bytea NOT NULL,
+      byte_length integer NOT NULL,
+      string_length integer,
+      PRIMARY KEY (project_id, hash_bytes),
+      CONSTRAINT project_blobs_byte_length_non_negative
+        CHECK (byte_length >= 0),
+      CONSTRAINT project_blobs_string_length_non_negative
+        CHECK (string_length IS NULL OR string_length >= 0)
+    )
+  `)
+
+  await knex.raw(`CREATE SEQUENCE IF NOT EXISTS docs_id_seq`)
+}
+
+exports.down = async function (knex) {
+  // Don't do anything on the down migration
+}
--- a/services/history-v1/migrations/20221026201437_chunk_start_version.js
+++ b/services/history-v1/migrations/20221026201437_chunk_start_version.js
@@ -0,0 +1,23 @@
+exports.up = async function (knex) {
+  await knex.raw(`
+    ALTER TABLE chunks ADD COLUMN start_version integer
+  `)
+  await knex.raw(`
+    ALTER TABLE pending_chunks ADD COLUMN start_version integer
+  `)
+  await knex.raw(`
+    ALTER TABLE old_chunks ADD COLUMN start_version integer
+  `)
+}
+
+exports.down = async function (knex) {
+  await knex.raw(`
+    ALTER TABLE chunks DROP COLUMN start_version
+  `)
+  await knex.raw(`
+    ALTER TABLE pending_chunks DROP COLUMN start_version
+  `)
+  await knex.raw(`
+    ALTER TABLE old_chunks DROP COLUMN start_version
+  `)
+}
--- a/services/history-v1/migrations/20221027201324_unique_start_version.js
+++ b/services/history-v1/migrations/20221027201324_unique_start_version.js
@@ -0,0 +1,41 @@
+exports.config = {
+  // CREATE INDEX CONCURRENTLY can't be run inside a transaction
+  // If this migration fails in the middle, indexes and constraints will have
+  // to be cleaned up manually.
+  transaction: false,
+}
+
+exports.up = async function (knex) {
+  await knex.raw(`
+    ALTER TABLE chunks
+    ADD CONSTRAINT chunks_start_version_non_negative
+    CHECK (start_version IS NOT NULL AND start_version >= 0)
+    NOT VALID
+  `)
+  await knex.raw(`
+    ALTER TABLE chunks
+    VALIDATE CONSTRAINT chunks_start_version_non_negative
+  `)
+  await knex.raw(`
+    CREATE UNIQUE INDEX CONCURRENTLY index_chunks_on_doc_id_and_start_version
+    ON chunks (doc_id, start_version)
+  `)
+  await knex.raw(`
+    ALTER TABLE chunks
+    ADD UNIQUE USING INDEX index_chunks_on_doc_id_and_start_version
+  `)
+}
+
+exports.down = async function (knex) {
+  await knex.raw(`
+    ALTER TABLE chunks
+    DROP CONSTRAINT IF EXISTS index_chunks_on_doc_id_and_start_version
+  `)
+  await knex.raw(`
+    DROP INDEX IF EXISTS index_chunks_on_doc_id_and_start_version
+  `)
+  await knex.raw(`
+    ALTER TABLE chunks
+    DROP CONSTRAINT IF EXISTS chunks_start_version_non_negative
+  `)
+}
--- a/services/history-v1/migrations/20221118213808_delete_global_blobs_table.js
+++ b/services/history-v1/migrations/20221118213808_delete_global_blobs_table.js
@@ -0,0 +1,7 @@
+exports.up = async function (knex) {
+  await knex.raw(`DROP TABLE IF EXISTS blobs`)
+}
+
+exports.down = function (knex) {
+  // Not reversible
+}
--- a/services/history-v1/migrations/20250415210802_add_chunks_closed.js
+++ b/services/history-v1/migrations/20250415210802_add_chunks_closed.js
@@ -0,0 +1,27 @@
+// @ts-check
+
+/**
+ * @import { Knex } from "knex"
+ */
+
+/**
+ * @param { Knex } knex
+ * @returns { Promise<void> }
+ */
+exports.up = async function (knex) {
+  await knex.raw(`
+    ALTER TABLE chunks
+    ADD COLUMN closed BOOLEAN NOT NULL DEFAULT FALSE
+  `)
+}
+
+/**
+ * @param { Knex } knex
+ * @returns { Promise<void> }
+ */
+exports.down = async function (knex) {
+  await knex.raw(`
+    ALTER TABLE chunks
+    DROP COLUMN closed
+  `)
+}
--- a/services/history-v1/package.json
+++ b/services/history-v1/package.json
@@ -0,0 +1,76 @@
+{
+  "name": "overleaf-editor",
+  "version": "1.0.0",
+  "description": "Overleaf Editor.",
+  "author": "",
+  "license": "Proprietary",
+  "private": true,
+  "dependencies": {
+    "@google-cloud/secret-manager": "^5.6.0",
+    "@overleaf/logger": "*",
+    "@overleaf/metrics": "*",
+    "@overleaf/mongo-utils": "*",
+    "@overleaf/o-error": "*",
+    "@overleaf/object-persistor": "*",
+    "@overleaf/promise-utils": "*",
+    "@overleaf/redis-wrapper": "*",
+    "@overleaf/settings": "*",
+    "@overleaf/stream-utils": "^0.1.0",
+    "archiver": "^5.3.0",
+    "basic-auth": "^2.0.1",
+    "bluebird": "^3.7.2",
+    "body-parser": "^1.20.3",
+    "bull": "^4.16.5",
+    "bunyan": "^1.8.12",
+    "check-types": "^11.1.2",
+    "command-line-args": "^3.0.3",
+    "config": "^1.19.0",
+    "express": "^4.21.2",
+    "fs-extra": "^9.0.1",
+    "generic-pool": "^2.1.1",
+    "helmet": "^3.22.0",
+    "http-status": "^1.4.2",
+    "jsonwebtoken": "^9.0.0",
+    "knex": "^2.4.0",
+    "lodash": "^4.17.19",
+    "mongodb": "6.12.0",
+    "overleaf-editor-core": "*",
+    "p-limit": "^6.2.0",
+    "pg": "^8.7.1",
+    "pg-query-stream": "^4.2.4",
+    "swagger-tools": "^0.10.4",
+    "temp": "^0.8.3",
+    "throng": "^4.0.0",
+    "tsscmp": "^1.0.6",
+    "utf-8-validate": "^5.0.4"
+  },
+  "devDependencies": {
+    "benny": "^3.7.1",
+    "chai": "^4.3.6",
+    "chai-as-promised": "^7.1.1",
+    "chai-exclude": "^2.1.1",
+    "mocha": "^11.1.0",
+    "node-fetch": "^2.7.0",
+    "sinon": "^9.0.2",
+    "swagger-client": "^3.10.0",
+    "typescript": "^5.0.4",
+    "yauzl": "^2.9.1"
+  },
+  "scripts": {
+    "start": "node app.js",
+    "lint": "eslint --max-warnings 0 --format unix .",
+    "lint:fix": "eslint --fix .",
+    "format": "prettier --list-different $PWD/'**/*.*js'",
+    "format:fix": "prettier --write $PWD/'**/*.*js'",
+    "test:unit": "npm run test:unit:_run -- --grep=$MOCHA_GREP",
+    "test:acceptance": "npm run test:acceptance:_run -- --grep=$MOCHA_GREP",
+    "test:unit:_run": "mocha --recursive --reporter spec $@ test/unit/js",
+    "test:acceptance:_run": "mocha --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
+    "nodemon": "node --watch app.js",
+    "migrate": "knex migrate:latest",
+    "delete_old_chunks": "node storage/tasks/delete_old_chunks.js",
+    "fix_duplicate_versions": "node storage/tasks/fix_duplicate_versions.js",
+    "benchmarks": "node benchmarks/index.js",
+    "types:check": "tsc --noEmit"
+  }
+}
--- a/services/history-v1/storage/index.js
+++ b/services/history-v1/storage/index.js
@@ -0,0 +1,25 @@
+exports.BatchBlobStore = require('./lib/batch_blob_store')
+exports.blobHash = require('./lib/blob_hash')
+exports.HashCheckBlobStore = require('./lib/hash_check_blob_store')
+exports.chunkBuffer = require('./lib/chunk_buffer')
+exports.chunkStore = require('./lib/chunk_store')
+exports.historyStore = require('./lib/history_store').historyStore
+exports.knex = require('./lib/knex')
+exports.mongodb = require('./lib/mongodb')
+exports.redis = require('./lib/redis')
+exports.persistChanges = require('./lib/persist_changes')
+exports.persistor = require('./lib/persistor')
+exports.ProjectArchive = require('./lib/project_archive')
+exports.streams = require('./lib/streams')
+exports.temp = require('./lib/temp')
+exports.zipStore = require('./lib/zip_store')
+
+const { BlobStore, loadGlobalBlobs } = require('./lib/blob_store')
+exports.BlobStore = BlobStore
+exports.loadGlobalBlobs = loadGlobalBlobs
+
+const { InvalidChangeError } = require('./lib/errors')
+exports.InvalidChangeError = InvalidChangeError
+
+const { ChunkVersionConflictError } = require('./lib/chunk_store/errors')
+exports.ChunkVersionConflictError = ChunkVersionConflictError
--- a/services/history-v1/storage/lib/assert.js
+++ b/services/history-v1/storage/lib/assert.js
@@ -0,0 +1,76 @@
+'use strict'
+
+const OError = require('@overleaf/o-error')
+
+const check = require('check-types')
+const { Blob } = require('overleaf-editor-core')
+
+const assert = check.assert
+
+const MONGO_ID_REGEXP = /^[0-9a-f]{24}$/
+const POSTGRES_ID_REGEXP = /^[1-9][0-9]{0,9}$/
+const MONGO_OR_POSTGRES_ID_REGEXP = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/
+
+function transaction(transaction, message) {
+  assert.function(transaction, message)
+}
+
+function blobHash(arg, message) {
+  try {
+    assert.match(arg, Blob.HEX_HASH_RX, message)
+  } catch (error) {
+    throw OError.tag(error, message, { arg })
+  }
+}
+
+/**
+ * A project id is a string that contains either an integer (for projects stored in Postgres) or 24
+ * hex digits (for projects stored in Mongo)
+ */
+function projectId(arg, message) {
+  try {
+    assert.match(arg, MONGO_OR_POSTGRES_ID_REGEXP, message)
+  } catch (error) {
+    throw OError.tag(error, message, { arg })
+  }
+}
+
+/**
+ * A chunk id is a string that contains either an integer (for projects stored in Postgres) or 24
+ * hex digits (for projects stored in Mongo)
+ */
+function chunkId(arg, message) {
+  try {
+    assert.match(arg, MONGO_OR_POSTGRES_ID_REGEXP, message)
+  } catch (error) {
+    throw OError.tag(error, message, { arg })
+  }
+}
+
+function mongoId(arg, message) {
+  try {
+    assert.match(arg, MONGO_ID_REGEXP, message)
+  } catch (error) {
+    throw OError.tag(error, message, { arg })
+  }
+}
+
+function postgresId(arg, message) {
+  try {
+    assert.match(arg, POSTGRES_ID_REGEXP, message)
+  } catch (error) {
+    throw OError.tag(error, message, { arg })
+  }
+}
+
+module.exports = {
+  ...assert,
+  transaction,
+  blobHash,
+  projectId,
+  chunkId,
+  mongoId,
+  postgresId,
+  MONGO_ID_REGEXP,
+  POSTGRES_ID_REGEXP,
+}
--- a/services/history-v1/storage/lib/backupBlob.mjs
+++ b/services/history-v1/storage/lib/backupBlob.mjs
@@ -0,0 +1,251 @@
+// @ts-check
+import { backupPersistor, projectBlobsBucket } from './backupPersistor.mjs'
+import { GLOBAL_BLOBS, makeProjectKey, BlobStore } from './blob_store/index.js'
+import Stream from 'node:stream'
+import fs from 'node:fs'
+import Crypto from 'node:crypto'
+import assert from './assert.js'
+import { backedUpBlobs, projects } from './mongodb.js'
+import { Binary, ObjectId } from 'mongodb'
+import logger from '@overleaf/logger/logging-manager.js'
+import { AlreadyWrittenError } from '@overleaf/object-persistor/src/Errors.js'
+import metrics from '@overleaf/metrics'
+import zLib from 'node:zlib'
+import Path from 'node:path'
+
+const HIGHWATER_MARK = 1024 * 1024
+
+/**
+ * @typedef {import("overleaf-editor-core").Blob} Blob
+ */
+
+/**
+ * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
+ */
+
+/**
+ * Increment a metric to record the outcome of a backup operation.
+ *
+ * @param {"success"|"failure"|"skipped"} status
+ * @param {"global"|"already_backed_up"|"none"} reason
+ */
+function recordBackupConclusion(status, reason = 'none') {
+  metrics.inc('blob_backed_up', 1, { status, reason })
+}
+
+/**
+ * Downloads a blob to a specified directory
+ *
+ * @param {string} historyId - The history ID of the project the blob belongs to
+ * @param {Blob} blob - The blob to download
+ * @param {string} tmpDir - The directory path where the blob will be downloaded
+ * @returns {Promise<string>} The full path where the blob was downloaded
+ */
+export async function downloadBlobToDir(historyId, blob, tmpDir) {
+  const blobStore = new BlobStore(historyId)
+  const blobHash = blob.getHash()
+  const src = await blobStore.getStream(blobHash)
+  const filePath = Path.join(tmpDir, `${historyId}-${blobHash}`)
+  try {
+    const dst = fs.createWriteStream(filePath, {
+      highWaterMark: HIGHWATER_MARK,
+      flags: 'wx',
+    })
+    await Stream.promises.pipeline(src, dst)
+    return filePath
+  } catch (error) {
+    try {
+      await fs.promises.unlink(filePath)
+    } catch {}
+    throw error
+  }
+}
+
+/**
+ * Performs the actual upload of the blob to the backup storage.
+ *
+ * @param {string} historyId - The history ID of the project the blob belongs to
+ * @param {Blob} blob - The blob being uploaded
+ * @param {string} path - The path to the file to upload (should have been stored on disk already)
+ * @return {Promise<void>}
+ */
+export async function uploadBlobToBackup(historyId, blob, path, persistor) {
+  const md5 = Crypto.createHash('md5')
+  const filePathCompressed = path + '.gz'
+  let backupSource
+  let contentEncoding
+  let size
+  try {
+    if (blob.getStringLength()) {
+      backupSource = filePathCompressed
+      contentEncoding = 'gzip'
+      size = 0
+      await Stream.promises.pipeline(
+        fs.createReadStream(path, { highWaterMark: HIGHWATER_MARK }),
+        zLib.createGzip(),
+        async function* (source) {
+          for await (const chunk of source) {
+            size += chunk.byteLength
+            md5.update(chunk)
+            yield chunk
+          }
+        },
+        fs.createWriteStream(filePathCompressed, {
+          highWaterMark: HIGHWATER_MARK,
+        })
+      )
+    } else {
+      backupSource = path
+      size = blob.getByteLength()
+      await Stream.promises.pipeline(
+        fs.createReadStream(path, { highWaterMark: HIGHWATER_MARK }),
+        md5
+      )
+    }
+    const key = makeProjectKey(historyId, blob.getHash())
+    await persistor.sendStream(
+      projectBlobsBucket,
+      key,
+      fs.createReadStream(backupSource, { highWaterMark: HIGHWATER_MARK }),
+      {
+        contentEncoding,
+        contentType: 'application/octet-stream',
+        contentLength: size,
+        sourceMd5: md5.digest('hex'),
+        ifNoneMatch: '*',
+      }
+    )
+  } finally {
+    if (backupSource === filePathCompressed) {
+      try {
+        await fs.promises.rm(filePathCompressed, { force: true })
+      } catch {}
+    }
+  }
+}
+
+/**
+ * Converts a legacy (postgres) historyId to a mongo projectId
+ *
+ * @param {string} historyId
+ * @return {Promise<string>}
+ * @private
+ */
+async function _convertLegacyHistoryIdToProjectId(historyId) {
+  const project = await projects.findOne(
+    { 'overleaf.history.id': parseInt(historyId) },
+    { projection: { _id: 1 } }
+  )
+
+  if (!project?._id) {
+    throw new Error('Did not find project for history id')
+  }
+
+  return project?._id?.toString()
+}
+
+/**
+ * Records that a blob was backed up for a project.
+ *
+ * @param {string} projectId - projectId for a project (mongo format)
+ * @param {string} hash
+ * @return {Promise<void>}
+ */
+export async function storeBlobBackup(projectId, hash) {
+  await backedUpBlobs.updateOne(
+    { _id: new ObjectId(projectId) },
+    { $addToSet: { blobs: new Binary(Buffer.from(hash, 'hex')) } },
+    { upsert: true }
+  )
+}
+
+/**
+ * Determine whether a specific blob has been backed up in this project.
+ *
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise<*>}
+ * @private
+ */
+export async function _blobIsBackedUp(projectId, hash) {
+  const blobs = await backedUpBlobs.findOne(
+    {
+      _id: new ObjectId(projectId),
+      blobs: new Binary(Buffer.from(hash, 'hex')),
+    },
+    { projection: { _id: 1 } }
+  )
+  return blobs?._id
+}
+
+/**
+ * Back up a blob to the global storage and record that it was backed up.
+ *
+ * @param {string} historyId - history ID for a project (can be postgres format or mongo format)
+ * @param {Blob} blob - The blob that is being backed up
+ * @param {string} tmpPath - The path to a temporary file storing the contents of the blob.
+ * @param {CachedPerProjectEncryptedS3Persistor} [persistor] - The persistor to use (optional)
+ * @return {Promise<void>}
+ */
+export async function backupBlob(historyId, blob, tmpPath, persistor) {
+  const hash = blob.getHash()
+
+  let projectId = historyId
+  if (assert.POSTGRES_ID_REGEXP.test(historyId)) {
+    projectId = await _convertLegacyHistoryIdToProjectId(historyId)
+  }
+
+  const globalBlob = GLOBAL_BLOBS.get(hash)
+
+  if (globalBlob && !globalBlob.demoted) {
+    recordBackupConclusion('skipped', 'global')
+    logger.debug({ projectId, hash }, 'Blob is global - skipping backup')
+    return
+  }
+
+  try {
+    if (await _blobIsBackedUp(projectId, hash)) {
+      recordBackupConclusion('skipped', 'already_backed_up')
+      logger.debug(
+        { projectId, hash },
+        'Blob already backed up - skipping backup'
+      )
+      return
+    }
+  } catch (error) {
+    logger.warn({ error }, 'Failed to check if blob is backed up')
+    // We'll try anyway - we'll catch the error if it was backed up
+  }
+  // If we weren't passed a persistor for this project, create one.
+  // This will fetch the key from AWS, so it's prefereable to use
+  // the same persistor for all blobs in a project where possible.
+  if (!persistor) {
+    logger.debug(
+      { historyId, hash },
+      'warning: persistor not passed to backupBlob'
+    )
+  }
+  persistor ??= await backupPersistor.forProject(
+    projectBlobsBucket,
+    makeProjectKey(historyId, '')
+  )
+  try {
+    logger.debug({ projectId, hash }, 'Starting blob backup')
+    await uploadBlobToBackup(historyId, blob, tmpPath, persistor)
+    await storeBlobBackup(projectId, hash)
+    recordBackupConclusion('success')
+  } catch (error) {
+    if (error instanceof AlreadyWrittenError) {
+      logger.debug({ error, projectId, hash }, 'Blob already backed up')
+      // record that we backed it up already
+      await storeBlobBackup(projectId, hash)
+      recordBackupConclusion('failure', 'already_backed_up')
+      return
+    }
+    // eventually queue this for retry - for now this will be fixed by running the script
+    recordBackupConclusion('failure')
+    logger.warn({ error, projectId, hash }, 'Failed to upload blob to backup')
+  } finally {
+    logger.debug({ projectId, hash }, 'Ended blob backup')
+  }
+}
--- a/services/history-v1/storage/lib/backupDeletion.mjs
+++ b/services/history-v1/storage/lib/backupDeletion.mjs
@@ -0,0 +1,93 @@
+// @ts-check
+import { callbackify } from 'util'
+import { ObjectId } from 'mongodb'
+import config from 'config'
+import OError from '@overleaf/o-error'
+import { db } from './mongodb.js'
+import projectKey from './project_key.js'
+import chunkStore from '../lib/chunk_store/index.js'
+import {
+  backupPersistor,
+  chunksBucket,
+  projectBlobsBucket,
+} from './backupPersistor.mjs'
+
+const MS_PER_DAY = 24 * 60 * 60 * 1000
+const EXPIRE_PROJECTS_AFTER_MS =
+  parseInt(config.get('minSoftDeletionPeriodDays'), 10) * MS_PER_DAY
+const deletedProjectsCollection = db.collection('deletedProjects')
+
+/**
+ * @param {string} historyId
+ * @return {Promise<boolean>}
+ */
+async function projectHasLatestChunk(historyId) {
+  const chunk = await chunkStore.getBackend(historyId).getLatestChunk(historyId)
+  return chunk != null
+}
+
+export class NotReadyToDelete extends OError {}
+
+/**
+ * @param {string} projectId
+ * @return {Promise<void>}
+ */
+async function deleteProjectBackup(projectId) {
+  const deletedProject = await deletedProjectsCollection.findOne(
+    { 'deleterData.deletedProjectId': new ObjectId(projectId) },
+    {
+      projection: {
+        'deleterData.deletedProjectOverleafHistoryId': 1,
+        'deleterData.deletedAt': 1,
+      },
+    }
+  )
+  if (!deletedProject) {
+    throw new NotReadyToDelete('refusing to delete non-deleted project')
+  }
+  const expiresAt =
+    deletedProject.deleterData.deletedAt.getTime() + EXPIRE_PROJECTS_AFTER_MS
+  if (expiresAt > Date.now()) {
+    throw new NotReadyToDelete('refusing to delete non-expired project')
+  }
+
+  const historyId =
+    deletedProject.deleterData.deletedProjectOverleafHistoryId?.toString()
+  if (!historyId) {
+    throw new NotReadyToDelete(
+      'refusing to delete project with unknown historyId'
+    )
+  }
+
+  if (await projectHasLatestChunk(historyId)) {
+    throw new NotReadyToDelete(
+      'refusing to delete project with remaining chunks'
+    )
+  }
+
+  const prefix = projectKey.format(historyId) + '/'
+  await backupPersistor.deleteDirectory(chunksBucket, prefix)
+  await backupPersistor.deleteDirectory(projectBlobsBucket, prefix)
+}
+
+export async function healthCheck() {
+  const HEALTH_CHECK_PROJECTS = JSON.parse(config.get('healthCheckProjects'))
+  if (HEALTH_CHECK_PROJECTS.length !== 2) {
+    throw new Error('expected 2 healthCheckProjects')
+  }
+  if (!HEALTH_CHECK_PROJECTS.some(id => id.length === 24)) {
+    throw new Error('expected mongo id in healthCheckProjects')
+  }
+  if (!HEALTH_CHECK_PROJECTS.some(id => id.length < 24)) {
+    throw new Error('expected postgres id in healthCheckProjects')
+  }
+
+  for (const historyId of HEALTH_CHECK_PROJECTS) {
+    if (!(await projectHasLatestChunk(historyId))) {
+      throw new Error(`project has no history: ${historyId}`)
+    }
+  }
+}
+
+export const healthCheckCb = callbackify(healthCheck)
+export const deleteProjectBackupCb = callbackify(deleteProjectBackup)
--- a/services/history-v1/storage/lib/backupGenerator.mjs
+++ b/services/history-v1/storage/lib/backupGenerator.mjs
@@ -0,0 +1,152 @@
+/**
+ * Provides a generator function to back up project chunks and blobs.
+ */
+
+import chunkStore from './chunk_store/index.js'
+
+import {
+  GLOBAL_BLOBS, // NOTE:  must call loadGlobalBlobs() before using this
+  BlobStore,
+} from './blob_store/index.js'
+
+import assert from './assert.js'
+
+async function lookBehindForSeenBlobs(
+  projectId,
+  chunk,
+  lastBackedUpVersion,
+  seenBlobs
+) {
+  if (chunk.startVersion === 0) {
+    return // this is the first chunk, no need to check for blobs in the previous chunk
+  }
+  if (chunk.startVersion > 0 && lastBackedUpVersion > chunk.startVersion) {
+    return // the snapshot in this chunk has already been backed up
+  }
+  if (
+    chunk.startVersion > 0 &&
+    lastBackedUpVersion === chunk.startVersion // same as previousChunk.endVersion
+  ) {
+    // the snapshot in this chunk has not been backed up
+    // so we find the set of backed up blobs from the previous chunk
+    const previousChunk = await chunkStore.loadAtVersion(
+      projectId,
+      lastBackedUpVersion
+    )
+    const previousChunkHistory = previousChunk.getHistory()
+    previousChunkHistory.findBlobHashes(seenBlobs)
+  }
+}
+
+/**
+ * Records blob hashes that have been previously seen in a chunk's history.
+ *
+ * @param {Object} chunk - The chunk containing history data
+ * @param {number} currentBackedUpVersion - The version number that has been backed up
+ * @param {Set<string>} seenBlobs - Set to collect previously seen blob hashes
+ * @returns {void}
+ */
+function recordPreviouslySeenBlobs(chunk, currentBackedUpVersion, seenBlobs) {
+  // We need to look at the chunk and decide how far we have backed up.
+  // If we have not backed up this chunk at all, we need to backup the blobs
+  // in the snapshot. Otherwise we need to backup the blobs in the changes
+  // that have occurred since the last backup.
+  const history = chunk.getHistory()
+  const startVersion = chunk.getStartVersion()
+  if (currentBackedUpVersion === 0) {
+    // If we have only backed up version 0 (i.e. the first change)
+    // then that includes the initial snapshot, so we consider
+    // the blobs of the initial snapshot as seen.  If the project
+    // has not been backed up at all then currentBackedUpVersion
+    // will be undefined.
+    history.snapshot.findBlobHashes(seenBlobs)
+  } else if (currentBackedUpVersion > startVersion) {
+    history.snapshot.findBlobHashes(seenBlobs)
+    for (let i = 0; i < currentBackedUpVersion - startVersion; i++) {
+      history.changes[i].findBlobHashes(seenBlobs)
+    }
+  }
+}
+
+/**
+ * Collects new blob objects that need to be backed up from a given chunk.
+ *
+ * @param {Object} chunk - The chunk object containing history data
+ * @param {Object} blobStore - Storage interface for retrieving blobs
+ * @param {Set<string>} seenBlobs - Set of blob hashes that have already been processed
+ * @returns {Promise<Object[]>} Array of blob objects that need to be backed up
+ * @throws {Error} If blob retrieval fails
+ */
+async function collectNewBlobsForBackup(chunk, blobStore, seenBlobs) {
+  /** @type {Set<string>} */
+  const blobHashes = new Set()
+  const history = chunk.getHistory()
+  // Get all the blobs in this chunk, then exclude the seenBlobs and global blobs
+  history.findBlobHashes(blobHashes)
+  const blobsToBackup = await blobStore.getBlobs(
+    [...blobHashes].filter(
+      hash =>
+        hash &&
+        !seenBlobs.has(hash) &&
+        (!GLOBAL_BLOBS.has(hash) || GLOBAL_BLOBS.get(hash).demoted)
+    )
+  )
+  return blobsToBackup
+}
+
+/**
+ * Asynchronously generates backups for a project based on provided versions.
+ * @param {string} projectId - The ID of the project's history to back up.
+ * @param {number} lastBackedUpVersion - The last version that was successfully backed up.
+ * @yields {AsyncGenerator<{ chunkRecord: object, chunkToBackup: object, chunkBuffer: Buffer, blobsToBackup: object[] }>}
+ *   Yields chunk records and corresponding data needed for backups.
+ */
+export async function* backupGenerator(projectId, lastBackedUpVersion) {
+  assert.projectId(projectId, 'bad projectId')
+  assert.maybe.integer(lastBackedUpVersion, 'bad lastBackedUpVersion')
+
+  const blobStore = new BlobStore(projectId)
+
+  /** @type {Set<string>} */
+  const seenBlobs = new Set() // records the blobs that are already backed up
+
+  const firstPendingVersion =
+    lastBackedUpVersion >= 0 ? lastBackedUpVersion + 1 : 0
+  let isStartingChunk = true
+  let currentBackedUpVersion = lastBackedUpVersion
+  const chunkRecordIterator = chunkStore.getProjectChunksFromVersion(
+    projectId,
+    firstPendingVersion
+  )
+
+  for await (const chunkRecord of chunkRecordIterator) {
+    const { chunk, chunkBuffer } = await chunkStore.loadByChunkRecord(
+      projectId,
+      chunkRecord
+    )
+
+    if (isStartingChunk) {
+      await lookBehindForSeenBlobs(
+        projectId,
+        chunkRecord,
+        lastBackedUpVersion,
+        seenBlobs
+      )
+      isStartingChunk = false
+    }
+
+    recordPreviouslySeenBlobs(chunk, currentBackedUpVersion, seenBlobs)
+
+    const blobsToBackup = await collectNewBlobsForBackup(
+      chunk,
+      blobStore,
+      seenBlobs
+    )
+
+    yield { chunkRecord, chunkToBackup: chunk, chunkBuffer, blobsToBackup }
+
+    // After we generate a backup of this chunk, mark the backed up blobs as seen
+    blobsToBackup.forEach(blob => seenBlobs.add(blob.getHash()))
+    currentBackedUpVersion = chunkRecord.endVersion
+  }
+}
--- a/services/history-v1/storage/lib/backupPersistor.mjs
+++ b/services/history-v1/storage/lib/backupPersistor.mjs
@@ -0,0 +1,121 @@
+// @ts-check
+import fs from 'node:fs'
+import Path from 'node:path'
+import _ from 'lodash'
+import config from 'config'
+import { SecretManagerServiceClient } from '@google-cloud/secret-manager'
+import OError from '@overleaf/o-error'
+import {
+  PerProjectEncryptedS3Persistor,
+  RootKeyEncryptionKey,
+} from '@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor.js'
+import { HistoryStore } from './history_store.js'
+
+const persistorConfig = _.cloneDeep(config.get('backupPersistor'))
+const { chunksBucket, deksBucket, globalBlobsBucket, projectBlobsBucket } =
+  config.get('backupStore')
+
+export { chunksBucket, globalBlobsBucket, projectBlobsBucket }
+
+function convertKey(key, convertFn) {
+  if (_.has(persistorConfig, key)) {
+    _.update(persistorConfig, key, convertFn)
+  }
+}
+
+convertKey('s3SSEC.httpOptions.timeout', s => parseInt(s, 10))
+convertKey('s3SSEC.maxRetries', s => parseInt(s, 10))
+convertKey('s3SSEC.pathStyle', s => s === 'true')
+// array of CA, either inlined or on disk
+convertKey('s3SSEC.ca', s =>
+  JSON.parse(s).map(ca => (ca.startsWith('/') ? fs.readFileSync(ca) : ca))
+)
+
+/** @type {() => Promise<string>} */
+let getRawRootKeyEncryptionKeys
+
+if ((process.env.NODE_ENV || 'production') === 'production') {
+  ;[persistorConfig.s3SSEC.key, persistorConfig.s3SSEC.secret] = (
+    await loadFromSecretsManager(
+      process.env.BACKUP_AWS_CREDENTIALS || '',
+      'BACKUP_AWS_CREDENTIALS'
+    )
+  ).split(':')
+  getRawRootKeyEncryptionKeys = () =>
+    loadFromSecretsManager(
+      persistorConfig.keyEncryptionKeys,
+      'BACKUP_KEY_ENCRYPTION_KEYS'
+    )
+} else {
+  getRawRootKeyEncryptionKeys = () => persistorConfig.keyEncryptionKeys
+}
+
+export const DELETION_ONLY = persistorConfig.keyEncryptionKeys === 'none'
+if (DELETION_ONLY) {
+  // For Backup-deleter; should not encrypt or read data; deleting does not need key.
+  getRawRootKeyEncryptionKeys = () => new Promise(_resolve => {})
+}
+
+const PROJECT_FOLDER_REGEX =
+  /^\d{3}\/\d{3}\/\d{3,}\/|[0-9a-f]{3}\/[0-9a-f]{3}\/[0-9a-f]{18}\/$/
+
+/**
+ * @param {string} bucketName
+ * @param {string} path
+ * @return {string}
+ */
+export function pathToProjectFolder(bucketName, path) {
+  switch (bucketName) {
+    case deksBucket:
+    case chunksBucket:
+    case projectBlobsBucket:
+      const projectFolder = Path.join(...path.split('/').slice(0, 3)) + '/'
+      if (!PROJECT_FOLDER_REGEX.test(projectFolder)) {
+        throw new OError('invalid project folder', { bucketName, path })
+      }
+      return projectFolder
+    default:
+      throw new Error(`${bucketName} does not store per-project files`)
+  }
+}
+
+/**
+ * @param {string} name
+ * @param {string} label
+ * @return {Promise<string>}
+ */
+async function loadFromSecretsManager(name, label) {
+  const client = new SecretManagerServiceClient()
+  const [version] = await client.accessSecretVersion({ name })
+  if (!version.payload?.data) throw new Error(`empty secret: ${label}`)
+  return version.payload.data.toString()
+}
+
+async function getRootKeyEncryptionKeys() {
+  return JSON.parse(await getRawRootKeyEncryptionKeys()).map(
+    ({ key, salt }) => {
+      return new RootKeyEncryptionKey(
+        Buffer.from(key, 'base64'),
+        Buffer.from(salt, 'base64')
+      )
+    }
+  )
+}
+
+export const backupPersistor = new PerProjectEncryptedS3Persistor({
+  ...persistorConfig.s3SSEC,
+  disableMultiPartUpload: true,
+  dataEncryptionKeyBucketName: deksBucket,
+  pathToProjectFolder,
+  getRootKeyEncryptionKeys,
+  storageClass: {
+    [deksBucket]: 'STANDARD',
+    [chunksBucket]: persistorConfig.tieringStorageClass,
+    [projectBlobsBucket]: persistorConfig.tieringStorageClass,
+  },
+})
+
+export const backupHistoryStore = new HistoryStore(
+  backupPersistor,
+  chunksBucket
+)
--- a/services/history-v1/storage/lib/backupVerifier.mjs
+++ b/services/history-v1/storage/lib/backupVerifier.mjs
@@ -0,0 +1,216 @@
+// @ts-check
+import OError from '@overleaf/o-error'
+import chunkStore from '../lib/chunk_store/index.js'
+import {
+  backupPersistor,
+  chunksBucket,
+  projectBlobsBucket,
+} from './backupPersistor.mjs'
+import { Blob, Chunk, History } from 'overleaf-editor-core'
+import { BlobStore, GLOBAL_BLOBS, makeProjectKey } from './blob_store/index.js'
+import blobHash from './blob_hash.js'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import logger from '@overleaf/logger'
+import path from 'node:path'
+import projectKey from './project_key.js'
+import streams from './streams.js'
+import objectPersistor from '@overleaf/object-persistor'
+import { getEndDateForRPO } from '../../backupVerifier/utils.mjs'
+
+/**
+ * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor.js").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
+ */
+
+/**
+ * @param {string} historyId
+ * @param {string} hash
+ */
+export async function verifyBlob(historyId, hash) {
+  return await verifyBlobs(historyId, [hash])
+}
+
+/**
+ *
+ * @param {string} historyId
+ * @return {Promise<CachedPerProjectEncryptedS3Persistor>}
+ */
+async function getProjectPersistor(historyId) {
+  try {
+    return await backupPersistor.forProjectRO(
+      projectBlobsBucket,
+      makeProjectKey(historyId, '')
+    )
+  } catch (err) {
+    if (err instanceof NotFoundError) {
+      throw new BackupCorruptedError('dek does not exist', {}, err)
+    }
+    throw err
+  }
+}
+
+/**
+ * @param {string} historyId
+ * @param {Array<string>} hashes
+ * @param {CachedPerProjectEncryptedS3Persistor} [projectCache]
+ */
+export async function verifyBlobs(historyId, hashes, projectCache) {
+  if (hashes.length === 0) throw new Error('bug: empty hashes')
+
+  if (!projectCache) {
+    projectCache = await getProjectPersistor(historyId)
+  }
+  const blobStore = new BlobStore(historyId)
+  for (const hash of hashes) {
+    const path = makeProjectKey(historyId, hash)
+    const blob = await blobStore.getBlob(hash)
+    if (!blob) throw new Blob.NotFoundError(hash)
+    let stream
+    try {
+      stream = await projectCache.getObjectStream(projectBlobsBucket, path, {
+        autoGunzip: true,
+      })
+    } catch (err) {
+      if (err instanceof NotFoundError) {
+        throw new BackupCorruptedMissingBlobError('missing blob', {
+          path,
+          hash,
+        })
+      }
+      throw err
+    }
+    const backupHash = await blobHash.fromStream(blob.getByteLength(), stream)
+    if (backupHash !== hash) {
+      throw new BackupCorruptedInvalidBlobError(
+        'hash mismatch for backed up blob',
+        {
+          path,
+          hash,
+          backupHash,
+        }
+      )
+    }
+  }
+}
+
+/**
+ * @param {string} historyId
+ * @param {Date} [endTimestamp]
+ */
+export async function verifyProjectWithErrorContext(
+  historyId,
+  endTimestamp = getEndDateForRPO()
+) {
+  try {
+    await verifyProject(historyId, endTimestamp)
+  } catch (err) {
+    // @ts-ignore err is Error instance
+    throw OError.tag(err, 'verifyProject', { historyId, endTimestamp })
+  }
+}
+
+/**
+ *
+ * @param {string} historyId
+ * @param {number} startVersion
+ * @param {CachedPerProjectEncryptedS3Persistor} backupPersistorForProject
+ * @return {Promise<any>}
+ */
+async function loadChunk(historyId, startVersion, backupPersistorForProject) {
+  const key = path.join(
+    projectKey.format(historyId),
+    projectKey.pad(startVersion)
+  )
+  try {
+    const buf = await streams.gunzipStreamToBuffer(
+      await backupPersistorForProject.getObjectStream(chunksBucket, key)
+    )
+    return JSON.parse(buf.toString('utf-8'))
+  } catch (err) {
+    if (err instanceof objectPersistor.Errors.NotFoundError) {
+      throw new Chunk.NotPersistedError(historyId)
+    }
+    if (err instanceof Error) {
+      throw OError.tag(err, 'Failed to load chunk', { historyId, startVersion })
+    }
+    throw err
+  }
+}
+
+/**
+ * @param {string} historyId
+ * @param {Date} endTimestamp
+ */
+export async function verifyProject(historyId, endTimestamp) {
+  const backend = chunkStore.getBackend(historyId)
+  const [first, last] = await Promise.all([
+    backend.getFirstChunkBeforeTimestamp(historyId, endTimestamp),
+    backend.getLastActiveChunkBeforeTimestamp(historyId, endTimestamp),
+  ])
+
+  const chunksRecordsToVerify = [
+    {
+      chunkId: first.id,
+      chunkLabel: 'first',
+    },
+  ]
+  if (first.startVersion !== last.startVersion) {
+    chunksRecordsToVerify.push({
+      chunkId: last.id,
+      chunkLabel: 'last before RPO',
+    })
+  }
+
+  const projectCache = await getProjectPersistor(historyId)
+
+  const chunks = await Promise.all(
+    chunksRecordsToVerify.map(async chunk => {
+      try {
+        return History.fromRaw(
+          await loadChunk(historyId, chunk.startVersion, projectCache)
+        )
+      } catch (err) {
+        if (err instanceof Chunk.NotPersistedError) {
+          throw new BackupRPOViolationChunkNotBackedUpError(
+            'BackupRPOviolation: chunk not backed up',
+            chunk
+          )
+        }
+        throw err
+      }
+    })
+  )
+  const seenBlobs = new Set()
+  const blobsToVerify = []
+  for (const chunk of chunks) {
+    /** @type {Set<string>} */
+    const chunkBlobs = new Set()
+    chunk.findBlobHashes(chunkBlobs)
+    let hasAddedBlobFromThisChunk = false
+    for (const blobHash of chunkBlobs) {
+      if (seenBlobs.has(blobHash)) continue // old blob
+      if (GLOBAL_BLOBS.has(blobHash)) continue // global blob
+      seenBlobs.add(blobHash)
+      if (!hasAddedBlobFromThisChunk) {
+        blobsToVerify.push(blobHash)
+        hasAddedBlobFromThisChunk = true
+      }
+    }
+  }
+  if (blobsToVerify.length === 0) {
+    logger.debug(
+      {
+        historyId,
+        chunksRecordsToVerify: chunksRecordsToVerify.map(c => c.chunkId),
+      },
+      'chunks contain no blobs to verify'
+    )
+    return
+  }
+  await verifyBlobs(historyId, blobsToVerify, projectCache)
+}
+
+export class BackupCorruptedError extends OError {}
+export class BackupRPOViolationError extends OError {}
+export class BackupCorruptedMissingBlobError extends BackupCorruptedError {}
+export class BackupCorruptedInvalidBlobError extends BackupCorruptedError {}
+export class BackupRPOViolationChunkNotBackedUpError extends OError {}
--- a/services/history-v1/storage/lib/backup_store/index.js
+++ b/services/history-v1/storage/lib/backup_store/index.js
@@ -0,0 +1,212 @@
+const { Binary, ObjectId } = require('mongodb')
+const { projects, backedUpBlobs } = require('../mongodb')
+const OError = require('@overleaf/o-error')
+
+// List projects with pending backups older than the specified interval
+function listPendingBackups(timeIntervalMs = 0, limit = null) {
+  const cutoffTime = new Date(Date.now() - timeIntervalMs)
+  const options = {
+    projection: { 'overleaf.backup.pendingChangeAt': 1 },
+    sort: { 'overleaf.backup.pendingChangeAt': 1 },
+  }
+
+  // Apply limit if provided
+  if (limit) {
+    options.limit = limit
+  }
+
+  const cursor = projects.find(
+    {
+      'overleaf.backup.pendingChangeAt': {
+        $exists: true,
+        $lt: cutoffTime,
+      },
+    },
+    options
+  )
+  return cursor
+}
+
+// List projects that have never been backed up and are older than the specified interval
+function listUninitializedBackups(timeIntervalMs = 0, limit = null) {
+  const cutoffTimeInSeconds = (Date.now() - timeIntervalMs) / 1000
+  const options = {
+    projection: { _id: 1 },
+    sort: { _id: 1 },
+  }
+  // Apply limit if provided
+  if (limit) {
+    options.limit = limit
+  }
+  const cursor = projects.find(
+    {
+      'overleaf.backup.lastBackedUpVersion': null,
+      _id: {
+        $lt: ObjectId.createFromTime(cutoffTimeInSeconds),
+      },
+    },
+    options
+  )
+  return cursor
+}
+
+// Retrieve the history ID for a given project without giving direct access to the
+// projects collection.
+
+async function getHistoryId(projectId) {
+  const project = await projects.findOne(
+    { _id: new ObjectId(projectId) },
+    {
+      projection: {
+        'overleaf.history.id': 1,
+      },
+    }
+  )
+  if (!project) {
+    throw new Error('Project not found')
+  }
+  return project.overleaf.history.id
+}
+
+async function getBackupStatus(projectId) {
+  const project = await projects.findOne(
+    { _id: new ObjectId(projectId) },
+    {
+      projection: {
+        'overleaf.history': 1,
+        'overleaf.backup': 1,
+      },
+    }
+  )
+  if (!project) {
+    throw new Error('Project not found')
+  }
+  return {
+    backupStatus: project.overleaf.backup,
+    historyId: `${project.overleaf.history.id}`,
+    currentEndVersion: project.overleaf.history.currentEndVersion,
+    currentEndTimestamp: project.overleaf.history.currentEndTimestamp,
+  }
+}
+
+async function setBackupVersion(
+  projectId,
+  previousBackedUpVersion,
+  currentBackedUpVersion,
+  currentBackedUpAt
+) {
+  // FIXME: include a check to handle race conditions
+  // to make sure only one process updates the version numbers
+  const result = await projects.updateOne(
+    {
+      _id: new ObjectId(projectId),
+      'overleaf.backup.lastBackedUpVersion': previousBackedUpVersion,
+    },
+    {
+      $set: {
+        'overleaf.backup.lastBackedUpVersion': currentBackedUpVersion,
+        'overleaf.backup.lastBackedUpAt': currentBackedUpAt,
+      },
+    }
+  )
+  if (result.matchedCount === 0 || result.modifiedCount === 0) {
+    throw new OError('Failed to update backup version', {
+      previousBackedUpVersion,
+      currentBackedUpVersion,
+      currentBackedUpAt,
+      result,
+    })
+  }
+}
+
+async function updateCurrentMetadataIfNotSet(projectId, latestChunkMetadata) {
+  await projects.updateOne(
+    {
+      _id: new ObjectId(projectId),
+      'overleaf.history.currentEndVersion': { $exists: false },
+      'overleaf.history.currentEndTimestamp': { $exists: false },
+    },
+    {
+      $set: {
+        'overleaf.history.currentEndVersion': latestChunkMetadata.endVersion,
+        'overleaf.history.currentEndTimestamp':
+          latestChunkMetadata.endTimestamp,
+      },
+    }
+  )
+}
+
+/**
+ * Updates the pending change timestamp for a project's backup status
+ * @param {string} projectId - The ID of the project to update
+ * @param {Date} backupStartTime - The timestamp to set for pending changes
+ * @returns {Promise<void>}
+ *
+ * If the project's last backed up version matches the current end version,
+ * the pending change timestamp is removed. Otherwise, it's set to the provided
+ * backup start time.
+ */
+async function updatePendingChangeTimestamp(projectId, backupStartTime) {
+  await projects.updateOne({ _id: new ObjectId(projectId) }, [
+    {
+      $set: {
+        'overleaf.backup.pendingChangeAt': {
+          $cond: {
+            if: {
+              $eq: [
+                '$overleaf.backup.lastBackedUpVersion',
+                '$overleaf.history.currentEndVersion',
+              ],
+            },
+            then: '$$REMOVE',
+            else: backupStartTime,
+          },
+        },
+      },
+    },
+  ])
+}
+
+async function getBackedUpBlobHashes(projectId) {
+  const result = await backedUpBlobs.findOne(
+    { _id: new ObjectId(projectId) },
+    { projection: { blobs: 1 } }
+  )
+  if (!result) {
+    return new Set()
+  }
+  const hashes = result.blobs.map(b => b.buffer.toString('hex'))
+  return new Set(hashes)
+}
+
+async function unsetBackedUpBlobHashes(projectId, hashes) {
+  const binaryHashes = hashes.map(h => new Binary(Buffer.from(h, 'hex')))
+  const result = await backedUpBlobs.findOneAndUpdate(
+    { _id: new ObjectId(projectId) },
+    {
+      $pullAll: {
+        blobs: binaryHashes,
+      },
+    },
+    { returnDocument: 'after' }
+  )
+  if (result && result.blobs.length === 0) {
+    await backedUpBlobs.deleteOne({
+      _id: new ObjectId(projectId),
+      blobs: { $size: 0 },
+    })
+  }
+  return result
+}
+
+module.exports = {
+  getHistoryId,
+  getBackupStatus,
+  setBackupVersion,
+  updateCurrentMetadataIfNotSet,
+  updatePendingChangeTimestamp,
+  listPendingBackups,
+  listUninitializedBackups,
+  getBackedUpBlobHashes,
+  unsetBackedUpBlobHashes,
+}
--- a/services/history-v1/storage/lib/batch_blob_store.js
+++ b/services/history-v1/storage/lib/batch_blob_store.js
@@ -0,0 +1,40 @@
+'use strict'
+
+const BPromise = require('bluebird')
+
+/**
+ * @constructor
+ * @param {BlobStore} blobStore
+ * @classdesc
+ * Wrapper for BlobStore that pre-fetches blob metadata to avoid making one
+ * database call per blob lookup.
+ */
+function BatchBlobStore(blobStore) {
+  this.blobStore = blobStore
+  this.blobs = new Map()
+}
+
+/**
+ * Pre-fetch metadata for the given blob hashes.
+ *
+ * @param {Array.<string>} hashes
+ * @return {Promise}
+ */
+BatchBlobStore.prototype.preload = function batchBlobStorePreload(hashes) {
+  return BPromise.each(this.blobStore.getBlobs(hashes), blob => {
+    this.blobs.set(blob.getHash(), blob)
+  })
+}
+
+/**
+ * @see BlobStore#getBlob
+ */
+BatchBlobStore.prototype.getBlob = BPromise.method(
+  function batchBlobStoreGetBlob(hash) {
+    const blob = this.blobs.get(hash)
+    if (blob) return blob
+    return this.blobStore.getBlob(hash)
+  }
+)
+
+module.exports = BatchBlobStore
--- a/services/history-v1/storage/lib/blob_hash.js
+++ b/services/history-v1/storage/lib/blob_hash.js
@@ -0,0 +1,80 @@
+/** @module */
+'use strict'
+
+const BPromise = require('bluebird')
+const fs = BPromise.promisifyAll(require('node:fs'))
+const crypto = require('node:crypto')
+const { pipeline } = require('node:stream')
+const assert = require('./assert')
+
+function getGitBlobHeader(byteLength) {
+  return 'blob ' + byteLength + '\x00'
+}
+
+function getBlobHash(byteLength) {
+  const hash = crypto.createHash('sha1')
+  hash.setEncoding('hex')
+  hash.update(getGitBlobHeader(byteLength))
+  return hash
+}
+
+/**
+ * Compute the git blob hash for a blob from a readable stream of its content.
+ *
+ * @function
+ * @param  {number} byteLength
+ * @param  {stream.Readable} stream
+ * @return {Promise.<string>} hexadecimal SHA-1 hash
+ */
+exports.fromStream = BPromise.method(
+  function blobHashFromStream(byteLength, stream) {
+    assert.integer(byteLength, 'blobHash: bad byteLength')
+    assert.object(stream, 'blobHash: bad stream')
+
+    const hash = getBlobHash(byteLength)
+    return new BPromise(function (resolve, reject) {
+      pipeline(stream, hash, function (err) {
+        if (err) {
+          reject(err)
+        } else {
+          hash.end()
+          resolve(hash.read())
+        }
+      })
+    })
+  }
+)
+
+/**
+ * Compute the git blob hash for a blob with the given string content.
+ *
+ * @param  {string} string
+ * @return {string} hexadecimal SHA-1 hash
+ */
+exports.fromString = function blobHashFromString(string) {
+  assert.string(string, 'blobHash: bad string')
+  const hash = getBlobHash(Buffer.byteLength(string))
+  hash.update(string, 'utf8')
+  hash.end()
+  return hash.read()
+}
+
+/**
+ * Compute the git blob hash for the content of a file
+ *
+ * @param  {string} filePath
+ * @return {string} hexadecimal SHA-1 hash
+ */
+exports.fromFile = function blobHashFromFile(pathname) {
+  assert.string(pathname, 'blobHash: bad pathname')
+
+  function getByteLengthOfFile() {
+    return fs.statAsync(pathname).then(stat => stat.size)
+  }
+
+  const fromStream = this.fromStream
+  return getByteLengthOfFile(pathname).then(function (byteLength) {
+    const stream = fs.createReadStream(pathname)
+    return fromStream(byteLength, stream)
+  })
+}
--- a/services/history-v1/storage/lib/blob_store/index.js
+++ b/services/history-v1/storage/lib/blob_store/index.js
@@ -0,0 +1,433 @@
+'use strict'
+
+const config = require('config')
+const fs = require('node:fs')
+const isValidUtf8 = require('utf-8-validate')
+const { ReadableString } = require('@overleaf/stream-utils')
+
+const core = require('overleaf-editor-core')
+const objectPersistor = require('@overleaf/object-persistor')
+const OError = require('@overleaf/o-error')
+const Blob = core.Blob
+const TextOperation = core.TextOperation
+const containsNonBmpChars = core.util.containsNonBmpChars
+
+const assert = require('../assert')
+const blobHash = require('../blob_hash')
+const mongodb = require('../mongodb')
+const persistor = require('../persistor')
+const projectKey = require('../project_key')
+const streams = require('../streams')
+const postgresBackend = require('./postgres')
+const mongoBackend = require('./mongo')
+const logger = require('@overleaf/logger')
+
+/** @import { Readable } from 'stream' */
+
+const GLOBAL_BLOBS = new Map()
+
+function makeGlobalKey(hash) {
+  return `${hash.slice(0, 2)}/${hash.slice(2, 4)}/${hash.slice(4)}`
+}
+
+function makeProjectKey(projectId, hash) {
+  return `${projectKey.format(projectId)}/${hash.slice(0, 2)}/${hash.slice(2)}`
+}
+
+async function uploadBlob(projectId, blob, stream, opts = {}) {
+  const bucket = config.get('blobStore.projectBucket')
+  const key = makeProjectKey(projectId, blob.getHash())
+  logger.debug({ projectId, blob }, 'uploadBlob started')
+  try {
+    await persistor.sendStream(bucket, key, stream, {
+      contentType: 'application/octet-stream',
+      ...opts,
+    })
+  } finally {
+    logger.debug({ projectId, blob }, 'uploadBlob finished')
+  }
+}
+
+function getBlobLocation(projectId, hash) {
+  if (GLOBAL_BLOBS.has(hash)) {
+    return {
+      bucket: config.get('blobStore.globalBucket'),
+      key: makeGlobalKey(hash),
+    }
+  } else {
+    return {
+      bucket: config.get('blobStore.projectBucket'),
+      key: makeProjectKey(projectId, hash),
+    }
+  }
+}
+
+/**
+ * Returns the appropriate backend for the given project id
+ *
+ * Numeric ids use the Postgres backend.
+ * Strings of 24 characters use the Mongo backend.
+ */
+function getBackend(projectId) {
+  if (assert.POSTGRES_ID_REGEXP.test(projectId)) {
+    return postgresBackend
+  } else if (assert.MONGO_ID_REGEXP.test(projectId)) {
+    return mongoBackend
+  } else {
+    throw new OError('bad project id', { projectId })
+  }
+}
+
+async function makeBlobForFile(pathname) {
+  const { size: byteLength } = await fs.promises.stat(pathname)
+  const hash = await blobHash.fromStream(
+    byteLength,
+    fs.createReadStream(pathname)
+  )
+  return new Blob(hash, byteLength)
+}
+
+async function getStringLengthOfFile(byteLength, pathname) {
+  // We have to read the file into memory to get its UTF-8 length, so don't
+  // bother for files that are too large for us to edit anyway.
+  if (byteLength > Blob.MAX_EDITABLE_BYTE_LENGTH_BOUND) {
+    return null
+  }
+
+  // We need to check if the file contains nonBmp or null characters
+  let data = await fs.promises.readFile(pathname)
+  if (!isValidUtf8(data)) return null
+  data = data.toString()
+  if (data.length > TextOperation.MAX_STRING_LENGTH) return null
+  if (containsNonBmpChars(data)) return null
+  if (data.indexOf('\x00') !== -1) return null
+  return data.length
+}
+
+async function deleteBlobsInBucket(projectId) {
+  const bucket = config.get('blobStore.projectBucket')
+  const prefix = `${projectKey.format(projectId)}/`
+  logger.debug({ projectId }, 'deleteBlobsInBucket started')
+  try {
+    await persistor.deleteDirectory(bucket, prefix)
+  } finally {
+    logger.debug({ projectId }, 'deleteBlobsInBucket finished')
+  }
+}
+
+async function loadGlobalBlobs() {
+  const blobs = await mongodb.globalBlobs.find()
+  for await (const blob of blobs) {
+    GLOBAL_BLOBS.set(blob._id, {
+      blob: new Blob(blob._id, blob.byteLength, blob.stringLength),
+      demoted: Boolean(blob.demoted),
+    })
+  }
+}
+
+/**
+ * Return metadata for all blobs in the given project
+ * @param {Array<string|number>} projectIds
+ * @return {Promise<{nBlobs:number, blobs:Map<string,Array<core.Blob>>}>}
+ */
+async function getProjectBlobsBatch(projectIds) {
+  const mongoProjects = []
+  const postgresProjects = []
+  for (const projectId of projectIds) {
+    if (typeof projectId === 'number') {
+      postgresProjects.push(projectId)
+    } else {
+      mongoProjects.push(projectId)
+    }
+  }
+  const [
+    { nBlobs: nBlobsPostgres, blobs: blobsPostgres },
+    { nBlobs: nBlobsMongo, blobs: blobsMongo },
+  ] = await Promise.all([
+    postgresBackend.getProjectBlobsBatch(postgresProjects),
+    mongoBackend.getProjectBlobsBatch(mongoProjects),
+  ])
+  for (const [id, blobs] of blobsPostgres.entries()) {
+    blobsMongo.set(id.toString(), blobs)
+  }
+  return { nBlobs: nBlobsPostgres + nBlobsMongo, blobs: blobsMongo }
+}
+
+/**
+ * @classdesc
+ * Fetch and store the content of files using content-addressable hashing. The
+ * blob store manages both content and metadata (byte and UTF-8 length) for
+ * blobs.
+ */
+class BlobStore {
+  /**
+   * @constructor
+   * @param {string} projectId the project for which we'd like to find blobs
+   */
+  constructor(projectId) {
+    assert.projectId(projectId)
+    this.projectId = projectId
+    this.backend = getBackend(this.projectId)
+  }
+
+  /**
+   * Set up the initial data structure for a given project
+   */
+  async initialize() {
+    await this.backend.initialize(this.projectId)
+  }
+
+  /**
+   * Write a blob, if one does not already exist, with the given UTF-8 encoded
+   * string content.
+   *
+   * @param {string} string
+   * @return {Promise.<core.Blob>}
+   */
+  async putString(string) {
+    assert.string(string, 'bad string')
+    const hash = blobHash.fromString(string)
+
+    const existingBlob = await this._findBlobBeforeInsert(hash)
+    if (existingBlob != null) {
+      return existingBlob
+    }
+    const newBlob = new Blob(hash, Buffer.byteLength(string), string.length)
+    // Note: the ReadableString is to work around a bug in the AWS SDK: it won't
+    // allow Body to be blank.
+    await uploadBlob(this.projectId, newBlob, new ReadableString(string))
+    await this.backend.insertBlob(this.projectId, newBlob)
+    return newBlob
+  }
+
+  /**
+   * Write a blob, if one does not already exist, with the given file (usually a
+   * temporary file).
+   *
+   * @param {string} pathname
+   * @return {Promise<core.Blob>}
+   */
+  async putFile(pathname) {
+    assert.string(pathname, 'bad pathname')
+    const newBlob = await makeBlobForFile(pathname)
+    const existingBlob = await this._findBlobBeforeInsert(newBlob.getHash())
+    if (existingBlob != null) {
+      return existingBlob
+    }
+    const stringLength = await getStringLengthOfFile(
+      newBlob.getByteLength(),
+      pathname
+    )
+    newBlob.setStringLength(stringLength)
+    await this.putBlob(pathname, newBlob)
+    return newBlob
+  }
+
+  /**
+   * Write a new blob, the stringLength must have been added already. It should
+   * have been checked that the blob does not exist yet. Consider using
+   * {@link putFile} instead of this lower-level method.
+   *
+   * @param {string} pathname
+   * @param {core.Blob} finializedBlob
+   * @return {Promise<void>}
+   */
+  async putBlob(pathname, finializedBlob) {
+    await uploadBlob(
+      this.projectId,
+      finializedBlob,
+      fs.createReadStream(pathname)
+    )
+    await this.backend.insertBlob(this.projectId, finializedBlob)
+  }
+
+  /**
+   * Stores an object as a JSON string in a blob.
+   *
+   * @param {object} obj
+   * @returns {Promise.<core.Blob>}
+   */
+  async putObject(obj) {
+    assert.object(obj, 'bad object')
+    const string = JSON.stringify(obj)
+    return await this.putString(string)
+  }
+
+  /**
+   *
+   * Fetch a blob's content by its hash as a UTF-8 encoded string.
+   *
+   * @param {string} hash hexadecimal SHA-1 hash
+   * @return {Promise.<string>} promise for the content of the file
+   */
+  async getString(hash) {
+    assert.blobHash(hash, 'bad hash')
+
+    const projectId = this.projectId
+    logger.debug({ projectId, hash }, 'getString started')
+    try {
+      const stream = await this.getStream(hash)
+      const buffer = await streams.readStreamToBuffer(stream)
+      return buffer.toString()
+    } finally {
+      logger.debug({ projectId, hash }, 'getString finished')
+    }
+  }
+
+  /**
+   * Fetch a JSON encoded blob by its hash and deserialize it.
+   *
+   * @template [T=unknown]
+   * @param {string} hash hexadecimal SHA-1 hash
+   * @return {Promise.<T>} promise for the content of the file
+   */
+  async getObject(hash) {
+    assert.blobHash(hash, 'bad hash')
+    const projectId = this.projectId
+    logger.debug({ projectId, hash }, 'getObject started')
+    try {
+      const jsonString = await this.getString(hash)
+      const object = JSON.parse(jsonString)
+      return object
+    } catch (error) {
+      // Maybe this is blob is gzipped. Try to gunzip it.
+      // TODO: Remove once we've ensured this is not reached
+      const stream = await this.getStream(hash)
+      const buffer = await streams.gunzipStreamToBuffer(stream)
+      const object = JSON.parse(buffer.toString())
+      logger.warn('getObject: Gzipped object in BlobStore')
+      return object
+    } finally {
+      logger.debug({ projectId, hash }, 'getObject finished')
+    }
+  }
+
+  /**
+   * Fetch a blob by its hash as a stream.
+   *
+   * Note that, according to the AWS SDK docs, this does not retry after initial
+   * failure, so the caller must be prepared to retry on errors, if appropriate.
+   *
+   * @param {string} hash hexadecimal SHA-1 hash
+   * @param {Object} opts
+   * @return {Promise.<Readable>} a stream to read the file
+   */
+  async getStream(hash, opts = {}) {
+    assert.blobHash(hash, 'bad hash')
+
+    const { bucket, key } = getBlobLocation(this.projectId, hash)
+    try {
+      const stream = await persistor.getObjectStream(bucket, key, opts)
+      return stream
+    } catch (err) {
+      if (err instanceof objectPersistor.Errors.NotFoundError) {
+        throw new Blob.NotFoundError(hash)
+      }
+      throw err
+    }
+  }
+
+  /**
+   * Read a blob metadata record by hexadecimal hash.
+   *
+   * @param {string} hash hexadecimal SHA-1 hash
+   * @return {Promise<core.Blob | null>}
+   */
+  async getBlob(hash) {
+    assert.blobHash(hash, 'bad hash')
+    const globalBlob = GLOBAL_BLOBS.get(hash)
+    if (globalBlob != null) {
+      return globalBlob.blob
+    }
+    const blob = await this.backend.findBlob(this.projectId, hash)
+    return blob
+  }
+
+  async getBlobs(hashes) {
+    assert.array(hashes, 'bad hashes')
+    const nonGlobalHashes = []
+    const blobs = []
+    for (const hash of hashes) {
+      const globalBlob = GLOBAL_BLOBS.get(hash)
+      if (globalBlob != null) {
+        blobs.push(globalBlob.blob)
+      } else {
+        nonGlobalHashes.push(hash)
+      }
+    }
+    if (nonGlobalHashes.length === 0) {
+      return blobs // to avoid unnecessary database lookup
+    }
+    const projectBlobs = await this.backend.findBlobs(
+      this.projectId,
+      nonGlobalHashes
+    )
+    blobs.push(...projectBlobs)
+    return blobs
+  }
+
+  /**
+   * Retrieve all blobs associated with the project.
+   * @returns {Promise<core.Blob[]>} A promise that resolves to an array of blobs.
+   */
+
+  async getProjectBlobs() {
+    const projectBlobs = await this.backend.getProjectBlobs(this.projectId)
+    return projectBlobs
+  }
+
+  /**
+   * Delete all blobs that belong to the project.
+   */
+  async deleteBlobs() {
+    await Promise.all([
+      this.backend.deleteBlobs(this.projectId),
+      deleteBlobsInBucket(this.projectId),
+    ])
+  }
+
+  async _findBlobBeforeInsert(hash) {
+    const globalBlob = GLOBAL_BLOBS.get(hash)
+    if (globalBlob != null && !globalBlob.demoted) {
+      return globalBlob.blob
+    }
+    const blob = await this.backend.findBlob(this.projectId, hash)
+    return blob
+  }
+
+  /**
+   * Copy an existing sourceBlob in this project to a target project.
+   * @param {Blob} sourceBlob
+   * @param {string} targetProjectId
+   * @return {Promise<void>}
+   */
+  async copyBlob(sourceBlob, targetProjectId) {
+    assert.instance(sourceBlob, Blob, 'bad sourceBlob')
+    assert.projectId(targetProjectId, 'bad targetProjectId')
+    const hash = sourceBlob.getHash()
+    const sourceProjectId = this.projectId
+    const { bucket, key: sourceKey } = getBlobLocation(sourceProjectId, hash)
+    const destKey = makeProjectKey(targetProjectId, hash)
+    const targetBackend = getBackend(targetProjectId)
+    logger.debug({ sourceProjectId, targetProjectId, hash }, 'copyBlob started')
+    try {
+      await persistor.copyObject(bucket, sourceKey, destKey)
+      await targetBackend.insertBlob(targetProjectId, sourceBlob)
+    } finally {
+      logger.debug(
+        { sourceProjectId, targetProjectId, hash },
+        'copyBlob finished'
+      )
+    }
+  }
+}
+
+module.exports = {
+  BlobStore,
+  getProjectBlobsBatch,
+  loadGlobalBlobs,
+  makeProjectKey,
+  makeBlobForFile,
+  getStringLengthOfFile,
+  GLOBAL_BLOBS,
+}
--- a/services/history-v1/storage/lib/blob_store/mongo.js
+++ b/services/history-v1/storage/lib/blob_store/mongo.js
@@ -0,0 +1,437 @@
+// @ts-check
+/**
+ * Mongo backend for the blob store.
+ *
+ * Blobs are stored in the projectHistoryBlobs collection. Each project has a
+ * document in that collection. That document has a "blobs" subdocument whose
+ * fields are buckets of blobs. The key of a bucket is the first three hex
+ * digits of the blob hash. The value of the bucket is an array of blobs that
+ * match the key.
+ *
+ * Buckets have a maximum capacity of 8 blobs. When that capacity is exceeded,
+ * blobs are stored in a secondary collection: the projectHistoryShardedBlobs
+ * collection. This collection shards blobs between 16 documents per project.
+ * The shard key is the first hex digit of the hash. The documents are also
+ * organized in buckets, but the bucket key is made of hex digits 2, 3 and 4.
+ */
+
+const { Blob } = require('overleaf-editor-core')
+const { ObjectId, Binary, MongoError, ReadPreference } = require('mongodb')
+const assert = require('../assert')
+const mongodb = require('../mongodb')
+
+const MAX_BLOBS_IN_BUCKET = 8
+const DUPLICATE_KEY_ERROR_CODE = 11000
+
+/**
+ * @typedef {import('mongodb').ReadPreferenceLike} ReadPreferenceLike
+ */
+
+/**
+ * Set up the data structures for a given project.
+ * @param {string} projectId
+ */
+async function initialize(projectId) {
+  assert.mongoId(projectId, 'bad projectId')
+  try {
+    await mongodb.blobs.insertOne({
+      _id: new ObjectId(projectId),
+      blobs: {},
+    })
+  } catch (err) {
+    if (err instanceof MongoError && err.code === DUPLICATE_KEY_ERROR_CODE) {
+      return // ignore already initialized case
+    }
+    throw err
+  }
+}
+
+/**
+ * Return blob metadata for the given project and hash.
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise<Blob | null>}
+ */
+async function findBlob(projectId, hash) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.blobHash(hash, 'bad hash')
+
+  const bucket = getBucket(hash)
+  const result = await mongodb.blobs.findOne(
+    { _id: new ObjectId(projectId) },
+    { projection: { _id: 0, bucket: `$${bucket}` } }
+  )
+
+  if (result?.bucket == null) {
+    return null
+  }
+
+  const record = result.bucket.find(blob => blob.h.toString('hex') === hash)
+  if (record == null) {
+    if (result.bucket.length >= MAX_BLOBS_IN_BUCKET) {
+      return await findBlobSharded(projectId, hash)
+    } else {
+      return null
+    }
+  }
+  return recordToBlob(record)
+}
+
+/**
+ * Search in the sharded collection for blob metadata
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise<Blob | null>}
+ */
+async function findBlobSharded(projectId, hash) {
+  const [shard, bucket] = getShardedBucket(hash)
+  const id = makeShardedId(projectId, shard)
+  const result = await mongodb.shardedBlobs.findOne(
+    { _id: id },
+    { projection: { _id: 0, blobs: `$${bucket}` } }
+  )
+  if (result?.blobs == null) {
+    return null
+  }
+  const record = result.blobs.find(blob => blob.h.toString('hex') === hash)
+  if (!record) return null
+  return recordToBlob(record)
+}
+
+/**
+ * Read multiple blob metadata records by hexadecimal hashes.
+ * @param {string} projectId
+ * @param {Array<string>} hashes
+ * @return {Promise<Array<Blob>>}
+ */
+async function findBlobs(projectId, hashes) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.array(hashes, 'bad hashes: not array')
+  hashes.forEach(function (hash) {
+    assert.blobHash(hash, 'bad hash')
+  })
+
+  // Build a set of unique buckets
+  const buckets = new Set(hashes.map(getBucket))
+
+  // Get buckets from Mongo
+  const projection = { _id: 0 }
+  for (const bucket of buckets) {
+    projection[bucket] = 1
+  }
+  const result = await mongodb.blobs.findOne(
+    { _id: new ObjectId(projectId) },
+    { projection }
+  )
+
+  if (result?.blobs == null) {
+    return []
+  }
+
+  // Build blobs from the query results
+  const hashSet = new Set(hashes)
+  const blobs = []
+  for (const bucket of Object.values(result.blobs)) {
+    for (const record of bucket) {
+      const hash = record.h.toString('hex')
+      if (hashSet.has(hash)) {
+        blobs.push(recordToBlob(record))
+        hashSet.delete(hash)
+      }
+    }
+  }
+
+  // If we haven't found all the blobs, look in the sharded collection
+  if (hashSet.size > 0) {
+    const shardedBlobs = await findBlobsSharded(projectId, hashSet)
+    blobs.push(...shardedBlobs)
+  }
+
+  return blobs
+}
+
+/**
+ * Search in the sharded collection for blob metadata.
+ * @param {string} projectId
+ * @param {Set<string>} hashSet
+ * @return {Promise<Array<Blob>>}
+ */
+async function findBlobsSharded(projectId, hashSet) {
+  // Build a map of buckets by shard key
+  const bucketsByShard = new Map()
+  for (const hash of hashSet) {
+    const [shard, bucket] = getShardedBucket(hash)
+    let buckets = bucketsByShard.get(shard)
+    if (buckets == null) {
+      buckets = new Set()
+      bucketsByShard.set(shard, buckets)
+    }
+    buckets.add(bucket)
+  }
+
+  // Make parallel requests to the shards that might contain the hashes we want
+  const requests = []
+  for (const [shard, buckets] of bucketsByShard.entries()) {
+    const id = makeShardedId(projectId, shard)
+    const projection = { _id: 0 }
+    for (const bucket of buckets) {
+      projection[bucket] = 1
+    }
+    const request = mongodb.shardedBlobs.findOne({ _id: id }, { projection })
+    requests.push(request)
+  }
+  const results = await Promise.all(requests)
+
+  // Build blobs from the query results
+  const blobs = []
+  for (const result of results) {
+    if (result?.blobs == null) {
+      continue
+    }
+
+    for (const bucket of Object.values(result.blobs)) {
+      for (const record of bucket) {
+        const hash = record.h.toString('hex')
+        if (hashSet.has(hash)) {
+          blobs.push(recordToBlob(record))
+        }
+      }
+    }
+  }
+  return blobs
+}
+
+/**
+ * Return metadata for all blobs in the given project
+ */
+async function getProjectBlobs(projectId) {
+  assert.mongoId(projectId, 'bad projectId')
+
+  const result = await mongodb.blobs.findOne(
+    { _id: new ObjectId(projectId) },
+    { projection: { _id: 0 } }
+  )
+
+  if (!result) {
+    return []
+  }
+
+  // Build blobs from the query results
+  const blobs = []
+  for (const bucket of Object.values(result.blobs)) {
+    for (const record of bucket) {
+      blobs.push(recordToBlob(record))
+    }
+  }
+
+  // Look for all possible sharded blobs
+
+  const minShardedId = makeShardedId(projectId, '0')
+  const maxShardedId = makeShardedId(projectId, 'f')
+  // @ts-ignore We are using a custom _id here.
+  const shardedRecords = mongodb.shardedBlobs.find(
+    {
+      _id: { $gte: minShardedId, $lte: maxShardedId },
+    },
+    { projection: { _id: 0 } }
+  )
+
+  for await (const shardedRecord of shardedRecords) {
+    if (shardedRecord.blobs == null) {
+      continue
+    }
+    for (const bucket of Object.values(shardedRecord.blobs)) {
+      for (const record of bucket) {
+        blobs.push(recordToBlob(record))
+      }
+    }
+  }
+
+  return blobs
+}
+
+/**
+ * Return metadata for all blobs in the given project
+ * @param {Array<string>} projectIds
+ * @return {Promise<{ nBlobs: number, blobs: Map<string, Array<Blob>> }>}
+ */
+async function getProjectBlobsBatch(projectIds) {
+  for (const project of projectIds) {
+    assert.mongoId(project, 'bad projectId')
+  }
+  let nBlobs = 0
+  const blobs = new Map()
+  if (projectIds.length === 0) return { nBlobs, blobs }
+
+  // blobs
+  {
+    const cursor = await mongodb.blobs.find(
+      { _id: { $in: projectIds.map(projectId => new ObjectId(projectId)) } },
+      { readPreference: ReadPreference.secondaryPreferred }
+    )
+    for await (const record of cursor) {
+      const projectBlobs = Object.values(record.blobs).flat().map(recordToBlob)
+      blobs.set(record._id.toString(), projectBlobs)
+      nBlobs += projectBlobs.length
+    }
+  }
+
+  // sharded blobs
+  {
+    // @ts-ignore We are using a custom _id here.
+    const cursor = await mongodb.shardedBlobs.find(
+      {
+        _id: {
+          $gte: makeShardedId(projectIds[0], '0'),
+          $lte: makeShardedId(projectIds[projectIds.length - 1], 'f'),
+        },
+      },
+      { readPreference: ReadPreference.secondaryPreferred }
+    )
+    for await (const record of cursor) {
+      const recordIdHex = record._id.toString('hex')
+      const recordProjectId = recordIdHex.slice(0, 24)
+      const projectBlobs = Object.values(record.blobs).flat().map(recordToBlob)
+      const found = blobs.get(recordProjectId)
+      if (found) {
+        found.push(...projectBlobs)
+      } else {
+        blobs.set(recordProjectId, projectBlobs)
+      }
+      nBlobs += projectBlobs.length
+    }
+  }
+  return { nBlobs, blobs }
+}
+
+/**
+ * Add a blob's metadata to the blobs collection after it has been uploaded.
+ * @param {string} projectId
+ * @param {Blob} blob
+ */
+async function insertBlob(projectId, blob) {
+  assert.mongoId(projectId, 'bad projectId')
+  const hash = blob.getHash()
+  const bucket = getBucket(hash)
+  const record = blobToRecord(blob)
+  const result = await mongodb.blobs.updateOne(
+    {
+      _id: new ObjectId(projectId),
+      $expr: {
+        $lt: [{ $size: { $ifNull: [`$${bucket}`, []] } }, MAX_BLOBS_IN_BUCKET],
+      },
+    },
+    {
+      $addToSet: { [bucket]: record },
+    }
+  )
+
+  if (result.matchedCount === 0) {
+    await insertRecordSharded(projectId, hash, record)
+  }
+}
+
+/**
+ * Add a blob's metadata to the sharded blobs collection.
+ * @param {string} projectId
+ * @param {string} hash
+ * @param {Record} record
+ * @return {Promise<void>}
+ */
+async function insertRecordSharded(projectId, hash, record) {
+  const [shard, bucket] = getShardedBucket(hash)
+  const id = makeShardedId(projectId, shard)
+  await mongodb.shardedBlobs.updateOne(
+    { _id: id },
+    { $addToSet: { [bucket]: record } },
+    { upsert: true }
+  )
+}
+
+/**
+ * Delete all blobs for a given project.
+ * @param {string} projectId
+ */
+async function deleteBlobs(projectId) {
+  assert.mongoId(projectId, 'bad projectId')
+  await mongodb.blobs.deleteOne({ _id: new ObjectId(projectId) })
+  const minShardedId = makeShardedId(projectId, '0')
+  const maxShardedId = makeShardedId(projectId, 'f')
+  await mongodb.shardedBlobs.deleteMany({
+    // @ts-ignore We are using a custom _id here.
+    _id: { $gte: minShardedId, $lte: maxShardedId },
+  })
+}
+
+/**
+ * Return the Mongo path to the bucket for the given hash.
+ * @param {string} hash
+ * @return {string}
+ */
+function getBucket(hash) {
+  return `blobs.${hash.slice(0, 3)}`
+}
+
+/**
+ * Return the shard key and Mongo path to the bucket for the given hash in the
+ * sharded collection.
+ * @param {string} hash
+ * @return {[string, string]}
+ */
+function getShardedBucket(hash) {
+  const shard = hash.slice(0, 1)
+  const bucket = `blobs.${hash.slice(1, 4)}`
+  return [shard, bucket]
+}
+
+/**
+ * Create an _id key for the sharded collection.
+ * @param {string} projectId
+ * @param {string} shard
+ * @return {Binary}
+ */
+function makeShardedId(projectId, shard) {
+  return new Binary(Buffer.from(`${projectId}0${shard}`, 'hex'))
+}
+
+/**
+ * @typedef {Object} Record
+ * @property {Binary} h
+ * @property {number} b
+ * @property {number} [s]
+ */
+
+/**
+ * Return the Mongo record for the given blob.
+ * @param {Blob} blob
+ * @return {Record}
+ */
+function blobToRecord(blob) {
+  const hash = blob.getHash()
+  const byteLength = blob.getByteLength()
+  const stringLength = blob.getStringLength()
+  return {
+    h: new Binary(Buffer.from(hash, 'hex')),
+    b: byteLength,
+    s: stringLength,
+  }
+}
+
+/**
+ * Create a blob from the given Mongo record.
+ * @param {Record} record
+ * @return {Blob}
+ */
+function recordToBlob(record) {
+  return new Blob(record.h.toString('hex'), record.b, record.s)
+}
+
+module.exports = {
+  initialize,
+  findBlob,
+  findBlobs,
+  getProjectBlobs,
+  getProjectBlobsBatch,
+  insertBlob,
+  deleteBlobs,
+}
--- a/services/history-v1/storage/lib/blob_store/postgres.js
+++ b/services/history-v1/storage/lib/blob_store/postgres.js
@@ -0,0 +1,161 @@
+const { Blob } = require('overleaf-editor-core')
+const assert = require('../assert')
+const knex = require('../knex')
+
+/**
+ * Set up the initial data structures for a project
+ */
+async function initialize(projectId) {
+  // Nothing to do for Postgres
+}
+
+/**
+ * Return blob metadata for the given project and hash
+ */
+async function findBlob(projectId, hash) {
+  assert.postgresId(projectId, 'bad projectId')
+  projectId = parseInt(projectId, 10)
+  assert.blobHash(hash, 'bad hash')
+
+  const binaryHash = hashToBuffer(hash)
+  const record = await knex('project_blobs')
+    .select('hash_bytes', 'byte_length', 'string_length')
+    .where({
+      project_id: projectId,
+      hash_bytes: binaryHash,
+    })
+    .first()
+  return recordToBlob(record)
+}
+
+/**
+ * Read multiple blob metadata records by hexadecimal hashes.
+ *
+ * @param {Array.<string>} hashes hexadecimal SHA-1 hashes
+ * @return {Promise.<Array.<Blob?>>} no guarantee on order
+ */
+async function findBlobs(projectId, hashes) {
+  assert.postgresId(projectId, 'bad projectId')
+  projectId = parseInt(projectId, 10)
+  assert.array(hashes, 'bad hashes: not array')
+  hashes.forEach(function (hash) {
+    assert.blobHash(hash, 'bad hash')
+  })
+
+  const binaryHashes = hashes.map(hashToBuffer)
+
+  const records = await knex('project_blobs')
+    .select('hash_bytes', 'byte_length', 'string_length')
+    .where('project_id', projectId)
+    .whereIn('hash_bytes', binaryHashes)
+
+  const blobs = records.map(recordToBlob)
+  return blobs
+}
+
+/**
+ * Return metadata for all blobs in the given project
+ */
+async function getProjectBlobs(projectId) {
+  assert.postgresId(projectId, 'bad projectId')
+  projectId = parseInt(projectId, 10)
+
+  const records = await knex('project_blobs')
+    .select('hash_bytes', 'byte_length', 'string_length')
+    .where({
+      project_id: projectId,
+    })
+
+  const blobs = records.map(recordToBlob)
+  return blobs
+}
+
+/**
+ * Return metadata for all blobs in the given project
+ * @param {Array<number>} projectIds
+ * @return {Promise<{ nBlobs: number, blobs: Map<number, Array<Blob>> }>}
+ */
+async function getProjectBlobsBatch(projectIds) {
+  for (const projectId of projectIds) {
+    assert.integer(projectId, 'bad projectId')
+  }
+  let nBlobs = 0
+  const blobs = new Map()
+  if (projectIds.length === 0) return { nBlobs, blobs }
+
+  const cursor = knex('project_blobs')
+    .select('project_id', 'hash_bytes', 'byte_length', 'string_length')
+    .whereIn('project_id', projectIds)
+    .stream()
+  for await (const record of cursor) {
+    const found = blobs.get(record.project_id)
+    if (found) {
+      found.push(recordToBlob(record))
+    } else {
+      blobs.set(record.project_id, [recordToBlob(record)])
+    }
+    nBlobs++
+  }
+  return { nBlobs, blobs }
+}
+
+/**
+ * Add a blob's metadata to the blobs table after it has been uploaded.
+ */
+async function insertBlob(projectId, blob) {
+  assert.postgresId(projectId, 'bad projectId')
+  projectId = parseInt(projectId, 10)
+
+  await knex('project_blobs')
+    .insert(blobToRecord(projectId, blob))
+    .onConflict(['project_id', 'hash_bytes'])
+    .ignore()
+}
+
+/**
+ * Deletes all blobs for a given project
+ */
+async function deleteBlobs(projectId) {
+  assert.postgresId(projectId, 'bad projectId')
+  projectId = parseInt(projectId, 10)
+
+  await knex('project_blobs').where('project_id', projectId).delete()
+}
+
+function blobToRecord(projectId, blob) {
+  return {
+    project_id: projectId,
+    hash_bytes: hashToBuffer(blob.hash),
+    byte_length: blob.getByteLength(),
+    string_length: blob.getStringLength(),
+  }
+}
+
+function recordToBlob(record) {
+  if (!record) return
+  return new Blob(
+    hashFromBuffer(record.hash_bytes),
+    record.byte_length,
+    record.string_length
+  )
+}
+
+function hashToBuffer(hash) {
+  if (!hash) return
+  return Buffer.from(hash, 'hex')
+}
+
+function hashFromBuffer(buffer) {
+  if (!buffer) return
+  return buffer.toString('hex')
+}
+
+module.exports = {
+  initialize,
+  findBlob,
+  findBlobs,
+  getProjectBlobs,
+  getProjectBlobsBatch,
+  insertBlob,
+  deleteBlobs,
+}
--- a/services/history-v1/storage/lib/chunk_buffer/index.js
+++ b/services/history-v1/storage/lib/chunk_buffer/index.js
@@ -0,0 +1,40 @@
+'use strict'
+
+/**
+ * @module storage/lib/chunk_buffer
+ */
+
+const chunkStore = require('../chunk_store')
+const redisBackend = require('../chunk_store/redis')
+const metrics = require('@overleaf/metrics')
+/**
+ * Load the latest Chunk stored for a project, including blob metadata.
+ *
+ * @param {string} projectId
+ * @return {Promise.<Chunk>}
+ */
+async function loadLatest(projectId) {
+  const cachedChunk = await redisBackend.getCurrentChunk(projectId)
+  const chunkRecord = await chunkStore.loadLatestRaw(projectId)
+  const cachedChunkIsValid = redisBackend.checkCacheValidityWithMetadata(
+    cachedChunk,
+    chunkRecord
+  )
+  if (cachedChunkIsValid) {
+    metrics.inc('chunk_buffer.loadLatest', 1, {
+      status: 'cache-hit',
+    })
+    return cachedChunk
+  } else {
+    metrics.inc('chunk_buffer.loadLatest', 1, {
+      status: 'cache-miss',
+    })
+    const chunk = await chunkStore.loadLatest(projectId)
+    await redisBackend.setCurrentChunk(projectId, chunk)
+    return chunk
+  }
+}
+
+module.exports = {
+  loadLatest,
+}
--- a/services/history-v1/storage/lib/chunk_store/errors.js
+++ b/services/history-v1/storage/lib/chunk_store/errors.js
@@ -0,0 +1,7 @@
+const OError = require('@overleaf/o-error')
+
+class ChunkVersionConflictError extends OError {}
+
+module.exports = {
+  ChunkVersionConflictError,
+}
--- a/services/history-v1/storage/lib/chunk_store/index.js
+++ b/services/history-v1/storage/lib/chunk_store/index.js
@@ -0,0 +1,447 @@
+// @ts-check
+
+'use strict'
+
+/**
+ * Manage {@link Chunk} and {@link History} storage.
+ *
+ * For storage, chunks are immutable. If we want to update a project with new
+ * changes, we create a new chunk record and History object and delete the old
+ * ones. If we compact a project's history, we similarly destroy the old chunk
+ * (or chunks) and replace them with a new one. This is helpful when using S3,
+ * because it guarantees only eventual consistency for updates but provides
+ * stronger consistency guarantees for object creation.
+ *
+ * When a chunk record in the database is removed, we save its ID for later
+ * in the `old_chunks` table, rather than deleting it immediately. This lets us
+ * use batch deletion to reduce the number of delete requests to S3.
+ *
+ * The chunk store also caches data about which blobs are referenced by each
+ * chunk, which allows us to find unused blobs without loading all of the data
+ * for all projects from S3. Whenever we create a chunk, we also insert records
+ * into the `chunk_blobs` table, to help with this bookkeeping.
+ */
+
+const config = require('config')
+const OError = require('@overleaf/o-error')
+const { Chunk, History, Snapshot } = require('overleaf-editor-core')
+
+const assert = require('../assert')
+const BatchBlobStore = require('../batch_blob_store')
+const { BlobStore } = require('../blob_store')
+const { historyStore } = require('../history_store')
+const mongoBackend = require('./mongo')
+const postgresBackend = require('./postgres')
+const { ChunkVersionConflictError } = require('./errors')
+
+const DEFAULT_DELETE_BATCH_SIZE = parseInt(config.get('maxDeleteKeys'), 10)
+const DEFAULT_DELETE_TIMEOUT_SECS = 3000 // 50 minutes
+const DEFAULT_DELETE_MIN_AGE_SECS = 86400 // 1 day
+
+/**
+ * Create the initial chunk for a project.
+ */
+async function initializeProject(projectId, snapshot) {
+  if (projectId != null) {
+    assert.projectId(projectId, 'bad projectId')
+  } else {
+    projectId = await postgresBackend.generateProjectId()
+  }
+
+  if (snapshot != null) {
+    assert.instance(snapshot, Snapshot, 'bad snapshot')
+  } else {
+    snapshot = new Snapshot()
+  }
+
+  const blobStore = new BlobStore(projectId)
+  await blobStore.initialize()
+
+  const backend = getBackend(projectId)
+  const chunkRecord = await backend.getLatestChunk(projectId)
+  if (chunkRecord != null) {
+    throw new AlreadyInitialized(projectId)
+  }
+
+  const history = new History(snapshot, [])
+  const chunk = new Chunk(history, 0)
+  await create(projectId, chunk)
+  return projectId
+}
+
+/**
+ * Load the blobs referenced in the given history
+ */
+async function lazyLoadHistoryFiles(history, batchBlobStore) {
+  const blobHashes = new Set()
+  history.findBlobHashes(blobHashes)
+
+  await batchBlobStore.preload(Array.from(blobHashes))
+  await history.loadFiles('lazy', batchBlobStore)
+}
+
+/**
+ * Load the latest Chunk stored for a project, including blob metadata.
+ *
+ * @param {string} projectId
+ * @param {Object} [opts]
+ * @param {boolean} [opts.readOnly]
+ * @return {Promise<{id: string, startVersion: number, endVersion: number, endTimestamp: Date}>}
+ */
+async function loadLatestRaw(projectId, opts) {
+  assert.projectId(projectId, 'bad projectId')
+
+  const backend = getBackend(projectId)
+  const chunkRecord = await backend.getLatestChunk(projectId, opts)
+  if (chunkRecord == null) {
+    throw new Chunk.NotFoundError(projectId)
+  }
+  return chunkRecord
+}
+
+/**
+ * Load the latest Chunk stored for a project, including blob metadata.
+ *
+ * @param {string} projectId
+ * @return {Promise.<Chunk>}
+ */
+async function loadLatest(projectId) {
+  const chunkRecord = await loadLatestRaw(projectId)
+  const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
+  const history = History.fromRaw(rawHistory)
+  const blobStore = new BlobStore(projectId)
+  const batchBlobStore = new BatchBlobStore(blobStore)
+  await lazyLoadHistoryFiles(history, batchBlobStore)
+  return new Chunk(history, chunkRecord.startVersion)
+}
+
+/**
+ * Load the the chunk that contains the given version, including blob metadata.
+ */
+async function loadAtVersion(projectId, version) {
+  assert.projectId(projectId, 'bad projectId')
+  assert.integer(version, 'bad version')
+
+  const backend = getBackend(projectId)
+  const blobStore = new BlobStore(projectId)
+  const batchBlobStore = new BatchBlobStore(blobStore)
+
+  const chunkRecord = await backend.getChunkForVersion(projectId, version)
+  const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
+  const history = History.fromRaw(rawHistory)
+  await lazyLoadHistoryFiles(history, batchBlobStore)
+  return new Chunk(history, chunkRecord.endVersion - history.countChanges())
+}
+
+/**
+ * Load the chunk that contains the version that was current at the given
+ * timestamp, including blob metadata.
+ */
+async function loadAtTimestamp(projectId, timestamp) {
+  assert.projectId(projectId, 'bad projectId')
+  assert.date(timestamp, 'bad timestamp')
+
+  const backend = getBackend(projectId)
+  const blobStore = new BlobStore(projectId)
+  const batchBlobStore = new BatchBlobStore(blobStore)
+
+  const chunkRecord = await backend.getChunkForTimestamp(projectId, timestamp)
+  const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
+  const history = History.fromRaw(rawHistory)
+  await lazyLoadHistoryFiles(history, batchBlobStore)
+  return new Chunk(history, chunkRecord.endVersion - history.countChanges())
+}
+
+/**
+ * Store the chunk and insert corresponding records in the database.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {Date} [earliestChangeTimestamp]
+ */
+async function create(projectId, chunk, earliestChangeTimestamp) {
+  assert.projectId(projectId, 'bad projectId')
+  assert.instance(chunk, Chunk, 'bad chunk')
+  assert.maybe.date(earliestChangeTimestamp, 'bad timestamp')
+
+  const backend = getBackend(projectId)
+  const chunkStart = chunk.getStartVersion()
+  const chunkId = await uploadChunk(projectId, chunk)
+
+  const opts = {}
+  if (chunkStart > 0) {
+    opts.oldChunkId = await getChunkIdForVersion(projectId, chunkStart - 1)
+  }
+  if (earliestChangeTimestamp != null) {
+    opts.earliestChangeTimestamp = earliestChangeTimestamp
+  }
+
+  await backend.confirmCreate(projectId, chunk, chunkId, opts)
+}
+
+/**
+ * Upload the given chunk to object storage.
+ *
+ * This is used by the create and update methods.
+ */
+async function uploadChunk(projectId, chunk) {
+  const backend = getBackend(projectId)
+  const blobStore = new BlobStore(projectId)
+
+  const historyStoreConcurrency = parseInt(
+    config.get('chunkStore.historyStoreConcurrency'),
+    10
+  )
+
+  const rawHistory = await chunk
+    .getHistory()
+    .store(blobStore, historyStoreConcurrency)
+  const chunkId = await backend.insertPendingChunk(projectId, chunk)
+  await historyStore.storeRaw(projectId, chunkId, rawHistory)
+  return chunkId
+}
+
+/**
+ * Extend the project's history by replacing the latest chunk with a new
+ * chunk.
+ *
+ * @param {string} projectId
+ * @param {number} oldEndVersion
+ * @param {Chunk} newChunk
+ * @param {Date} [earliestChangeTimestamp]
+ * @return {Promise}
+ */
+async function update(
+  projectId,
+  oldEndVersion,
+  newChunk,
+  earliestChangeTimestamp
+) {
+  assert.projectId(projectId, 'bad projectId')
+  assert.integer(oldEndVersion, 'bad oldEndVersion')
+  assert.instance(newChunk, Chunk, 'bad newChunk')
+  assert.maybe.date(earliestChangeTimestamp, 'bad timestamp')
+
+  const backend = getBackend(projectId)
+  const oldChunkId = await getChunkIdForVersion(projectId, oldEndVersion)
+  const newChunkId = await uploadChunk(projectId, newChunk)
+
+  const opts = {}
+  if (earliestChangeTimestamp != null) {
+    opts.earliestChangeTimestamp = earliestChangeTimestamp
+  }
+
+  await backend.confirmUpdate(projectId, oldChunkId, newChunk, newChunkId, opts)
+}
+
+/**
+ * Find the chunk ID for a given version of a project.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ * @return {Promise.<string>}
+ */
+async function getChunkIdForVersion(projectId, version) {
+  const backend = getBackend(projectId)
+  const chunkRecord = await backend.getChunkForVersion(projectId, version)
+  return chunkRecord.id
+}
+
+/**
+ * Find the chunk metadata for a given version of a project.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ * @return {Promise.<{id: string|number, startVersion: number, endVersion: number}>}
+ */
+async function getChunkMetadataForVersion(projectId, version) {
+  const backend = getBackend(projectId)
+  const chunkRecord = await backend.getChunkForVersion(projectId, version)
+  return chunkRecord
+}
+
+/**
+ * Get all of a project's chunk ids
+ */
+async function getProjectChunkIds(projectId) {
+  const backend = getBackend(projectId)
+  const chunkIds = await backend.getProjectChunkIds(projectId)
+  return chunkIds
+}
+
+/**
+ * Get all of a projects chunks directly
+ */
+async function getProjectChunks(projectId) {
+  const backend = getBackend(projectId)
+  const chunkIds = await backend.getProjectChunks(projectId)
+  return chunkIds
+}
+
+/**
+ * Load the chunk for a given chunk record, including blob metadata.
+ */
+async function loadByChunkRecord(projectId, chunkRecord) {
+  const blobStore = new BlobStore(projectId)
+  const batchBlobStore = new BatchBlobStore(blobStore)
+  const { raw: rawHistory, buffer: chunkBuffer } =
+    await historyStore.loadRawWithBuffer(projectId, chunkRecord.id)
+  const history = History.fromRaw(rawHistory)
+  await lazyLoadHistoryFiles(history, batchBlobStore)
+  return {
+    chunk: new Chunk(history, chunkRecord.endVersion - history.countChanges()),
+    chunkBuffer,
+  }
+}
+
+/**
+ * Asynchronously retrieves project chunks starting from a specific version.
+ *
+ * This generator function yields chunk records for a given project starting from the specified version (inclusive).
+ * It continues to fetch and yield subsequent chunk records until the end version of the latest chunk metadata is reached.
+ * If you want to fetch all the chunks *after* a version V, call this function with V+1.
+ *
+ * @param {string} projectId - The ID of the project.
+ * @param {number} version - The starting version to retrieve chunks from.
+ * @returns {AsyncGenerator<Object, void, undefined>} An async generator that yields chunk records.
+ */
+async function* getProjectChunksFromVersion(projectId, version) {
+  const backend = getBackend(projectId)
+  const latestChunkMetadata = await loadLatestRaw(projectId)
+  if (!latestChunkMetadata || version > latestChunkMetadata.endVersion) {
+    return
+  }
+  let chunkRecord = await backend.getChunkForVersion(projectId, version)
+  while (chunkRecord != null) {
+    yield chunkRecord
+    if (chunkRecord.endVersion >= latestChunkMetadata.endVersion) {
+      break
+    } else {
+      chunkRecord = await backend.getChunkForVersion(
+        projectId,
+        chunkRecord.endVersion + 1
+      )
+    }
+  }
+}
+
+/**
+ * Delete the given chunk from the database.
+ *
+ * This doesn't delete the chunk from object storage yet. The old chunks
+ * collection will do that.
+ */
+async function destroy(projectId, chunkId) {
+  const backend = getBackend(projectId)
+  await backend.deleteChunk(projectId, chunkId)
+}
+
+/**
+ * Delete all of a project's chunks from the database.
+ */
+async function deleteProjectChunks(projectId) {
+  const backend = getBackend(projectId)
+  await backend.deleteProjectChunks(projectId)
+}
+
+/**
+ * Delete a given number of old chunks from both the database
+ * and from object storage.
+ *
+ * @param {object} options
+ * @param {number} [options.batchSize] - number of chunks to delete in each
+ *                                       batch
+ * @param {number} [options.maxBatches] - maximum number of batches to process
+ * @param {number} [options.minAgeSecs] - minimum age of chunks to delete
+ * @param {number} [options.timeout] - maximum time to spend deleting chunks
+ *
+ * @return {Promise<number>} number of chunks deleted
+ */
+async function deleteOldChunks(options = {}) {
+  const batchSize = options.batchSize ?? DEFAULT_DELETE_BATCH_SIZE
+  const maxBatches = options.maxBatches ?? Number.MAX_SAFE_INTEGER
+  const minAgeSecs = options.minAgeSecs ?? DEFAULT_DELETE_MIN_AGE_SECS
+  const timeout = options.timeout ?? DEFAULT_DELETE_TIMEOUT_SECS
+  assert.greater(batchSize, 0)
+  assert.greater(timeout, 0)
+  assert.greater(maxBatches, 0)
+  assert.greaterOrEqual(minAgeSecs, 0)
+
+  const timeoutAfter = Date.now() + timeout * 1000
+  let deletedChunksTotal = 0
+  for (const backend of [postgresBackend, mongoBackend]) {
+    for (let i = 0; i < maxBatches; i++) {
+      if (Date.now() > timeoutAfter) {
+        break
+      }
+      const deletedChunks = await deleteOldChunksBatch(
+        backend,
+        batchSize,
+        minAgeSecs
+      )
+      deletedChunksTotal += deletedChunks.length
+      if (deletedChunks.length !== batchSize) {
+        // Last batch was incomplete. There probably are no old chunks left
+        break
+      }
+    }
+  }
+  return deletedChunksTotal
+}
+
+async function deleteOldChunksBatch(backend, count, minAgeSecs) {
+  assert.greater(count, 0, 'bad count')
+  assert.greaterOrEqual(minAgeSecs, 0, 'bad minAgeSecs')
+
+  const oldChunks = await backend.getOldChunksBatch(count, minAgeSecs)
+  if (oldChunks.length === 0) {
+    return []
+  }
+  await historyStore.deleteChunks(oldChunks)
+  await backend.deleteOldChunks(oldChunks.map(chunk => chunk.chunkId))
+  return oldChunks
+}
+
+/**
+ * Returns the appropriate backend for the given project id
+ *
+ * Numeric ids use the Postgres backend.
+ * Strings of 24 characters use the Mongo backend.
+ */
+function getBackend(projectId) {
+  if (assert.POSTGRES_ID_REGEXP.test(projectId)) {
+    return postgresBackend
+  } else if (assert.MONGO_ID_REGEXP.test(projectId)) {
+    return mongoBackend
+  } else {
+    throw new OError('bad project id', { projectId })
+  }
+}
+
+class AlreadyInitialized extends OError {
+  constructor(projectId) {
+    super('Project is already initialized', { projectId })
+  }
+}
+
+module.exports = {
+  getBackend,
+  initializeProject,
+  loadLatest,
+  loadLatestRaw,
+  loadAtVersion,
+  loadAtTimestamp,
+  loadByChunkRecord,
+  create,
+  update,
+  destroy,
+  getChunkIdForVersion,
+  getChunkMetadataForVersion,
+  getProjectChunkIds,
+  getProjectChunks,
+  getProjectChunksFromVersion,
+  deleteProjectChunks,
+  deleteOldChunks,
+  AlreadyInitialized,
+  ChunkVersionConflictError,
+}
--- a/services/history-v1/storage/lib/chunk_store/mongo.js
+++ b/services/history-v1/storage/lib/chunk_store/mongo.js
@@ -0,0 +1,526 @@
+// @ts-check
+
+const { ObjectId, ReadPreference, MongoError } = require('mongodb')
+const { Chunk } = require('overleaf-editor-core')
+const OError = require('@overleaf/o-error')
+const assert = require('../assert')
+const mongodb = require('../mongodb')
+const { ChunkVersionConflictError } = require('./errors')
+
+const DUPLICATE_KEY_ERROR_CODE = 11000
+
+/**
+ * @import { ClientSession } from 'mongodb'
+ */
+
+/**
+ * Get the latest chunk's metadata from the database
+ * @param {string} projectId
+ * @param {Object} [opts]
+ * @param {boolean} [opts.readOnly]
+ */
+async function getLatestChunk(projectId, opts = {}) {
+  assert.mongoId(projectId, 'bad projectId')
+  const { readOnly = false } = opts
+
+  const record = await mongodb.chunks.findOne(
+    {
+      projectId: new ObjectId(projectId),
+      state: { $in: ['active', 'closed'] },
+    },
+    {
+      sort: { startVersion: -1 },
+      readPreference: readOnly
+        ? ReadPreference.secondaryPreferred
+        : ReadPreference.primary,
+    }
+  )
+  if (record == null) {
+    return null
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the given version.
+ */
+async function getChunkForVersion(projectId, version) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.integer(version, 'bad version')
+
+  const record = await mongodb.chunks.findOne(
+    {
+      projectId: new ObjectId(projectId),
+      state: { $in: ['active', 'closed'] },
+      startVersion: { $lte: version },
+      endVersion: { $gte: version },
+    },
+    { sort: { startVersion: 1 } }
+  )
+  if (record == null) {
+    throw new Chunk.VersionNotFoundError(projectId, version)
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the given version before the endTime.
+ */
+async function getFirstChunkBeforeTimestamp(projectId, timestamp) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.date(timestamp, 'bad timestamp')
+
+  const recordActive = await getChunkForVersion(projectId, 0)
+  if (recordActive && recordActive.endTimestamp <= timestamp) {
+    return recordActive
+  }
+
+  // fallback to deleted chunk
+  const recordDeleted = await mongodb.chunks.findOne(
+    {
+      projectId: new ObjectId(projectId),
+      state: 'deleted',
+      startVersion: 0,
+      updatedAt: { $lte: timestamp }, // indexed for state=deleted
+      endTimestamp: { $lte: timestamp },
+    },
+    { sort: { updatedAt: -1 } }
+  )
+  if (recordDeleted) {
+    return chunkFromRecord(recordDeleted)
+  }
+  throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+}
+
+/**
+ * Get the metadata for the chunk that contains the version that was current at
+ * the given timestamp.
+ */
+async function getChunkForTimestamp(projectId, timestamp) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.date(timestamp, 'bad timestamp')
+
+  const record = await mongodb.chunks.findOne(
+    {
+      projectId: new ObjectId(projectId),
+      state: { $in: ['active', 'closed'] },
+      endTimestamp: { $gte: timestamp },
+    },
+    // We use the index on the startVersion for sorting records. This assumes
+    // that timestamps go up with each version.
+    { sort: { startVersion: 1 } }
+  )
+
+  if (record == null) {
+    // Couldn't find a chunk that had modifications after the given timestamp.
+    // Fetch the latest chunk instead.
+    const chunk = await getLatestChunk(projectId)
+    if (chunk == null) {
+      throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+    }
+    return chunk
+  }
+
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the version that was current before
+ * the given timestamp.
+ */
+async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.date(timestamp, 'bad timestamp')
+
+  const record = await mongodb.chunks.findOne(
+    {
+      projectId: new ObjectId(projectId),
+      state: { $in: ['active', 'closed'] },
+      $or: [
+        {
+          endTimestamp: {
+            $lte: timestamp,
+          },
+        },
+        {
+          endTimestamp: null,
+        },
+      ],
+    },
+    // We use the index on the startVersion for sorting records. This assumes
+    // that timestamps go up with each version.
+    { sort: { startVersion: -1 } }
+  )
+  if (record == null) {
+    throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get all of a project's chunk ids
+ */
+async function getProjectChunkIds(projectId) {
+  assert.mongoId(projectId, 'bad projectId')
+
+  const cursor = mongodb.chunks.find(
+    {
+      projectId: new ObjectId(projectId),
+      state: { $in: ['active', 'closed'] },
+    },
+    { projection: { _id: 1 } }
+  )
+  return await cursor.map(record => record._id).toArray()
+}
+
+/**
+ * Get all of a projects chunks directly
+ */
+async function getProjectChunks(projectId) {
+  assert.mongoId(projectId, 'bad projectId')
+
+  const cursor = mongodb.chunks
+    .find(
+      {
+        projectId: new ObjectId(projectId),
+        state: { $in: ['active', 'closed'] },
+      },
+      { projection: { state: 0 } }
+    )
+    .sort({ startVersion: 1 })
+  return await cursor.map(chunkFromRecord).toArray()
+}
+
+/**
+ * Insert a pending chunk before sending it to object storage.
+ */
+async function insertPendingChunk(projectId, chunk) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.instance(chunk, Chunk, 'bad chunk')
+
+  const chunkId = new ObjectId()
+  await mongodb.chunks.insertOne({
+    _id: chunkId,
+    projectId: new ObjectId(projectId),
+    startVersion: chunk.getStartVersion(),
+    endVersion: chunk.getEndVersion(),
+    endTimestamp: chunk.getEndTimestamp(),
+    state: 'pending',
+    updatedAt: new Date(),
+  })
+  return chunkId.toString()
+}
+
+/**
+ * Record that a new chunk was created.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {string} chunkId
+ * @param {object} opts
+ * @param {Date} [opts.earliestChangeTimestamp]
+ * @param {string} [opts.oldChunkId]
+ */
+async function confirmCreate(projectId, chunk, chunkId, opts = {}) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.instance(chunk, Chunk, 'bad newChunk')
+  assert.mongoId(chunkId, 'bad newChunkId')
+
+  await mongodb.client.withSession(async session => {
+    await session.withTransaction(async () => {
+      if (opts.oldChunkId != null) {
+        await closeChunk(projectId, opts.oldChunkId, { session })
+      }
+
+      await activateChunk(projectId, chunkId, { session })
+
+      await updateProjectRecord(
+        projectId,
+        chunk,
+        opts.earliestChangeTimestamp,
+        { session }
+      )
+    })
+  })
+}
+
+/**
+ * Write the metadata to the project record
+ */
+async function updateProjectRecord(
+  projectId,
+  chunk,
+  earliestChangeTimestamp,
+  mongoOpts = {}
+) {
+  // record the end version against the project
+  await mongodb.projects.updateOne(
+    {
+      'overleaf.history.id': projectId, // string for Object ids, number for postgres ids
+    },
+    {
+      // always store the latest end version and timestamp for the chunk
+      $max: {
+        'overleaf.history.currentEndVersion': chunk.getEndVersion(),
+        'overleaf.history.currentEndTimestamp': chunk.getEndTimestamp(),
+        'overleaf.history.updatedAt': new Date(),
+      },
+      // store the first pending change timestamp for the chunk, this will
+      // be cleared every time a backup is completed.
+      $min: {
+        'overleaf.backup.pendingChangeAt':
+          earliestChangeTimestamp || chunk.getEndTimestamp() || new Date(),
+      },
+    },
+    mongoOpts
+  )
+}
+
+/**
+ * Record that a chunk was replaced by a new one.
+ *
+ * @param {string} projectId
+ * @param {string} oldChunkId
+ * @param {Chunk} newChunk
+ * @param {string} newChunkId
+ * @param {object} [opts]
+ * @param {Date} [opts.earliestChangeTimestamp]
+ */
+async function confirmUpdate(
+  projectId,
+  oldChunkId,
+  newChunk,
+  newChunkId,
+  opts = {}
+) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.mongoId(oldChunkId, 'bad oldChunkId')
+  assert.instance(newChunk, Chunk, 'bad newChunk')
+  assert.mongoId(newChunkId, 'bad newChunkId')
+
+  await mongodb.client.withSession(async session => {
+    await session.withTransaction(async () => {
+      await deleteActiveChunk(projectId, oldChunkId, { session })
+
+      await activateChunk(projectId, newChunkId, { session })
+
+      await updateProjectRecord(
+        projectId,
+        newChunk,
+        opts.earliestChangeTimestamp,
+        { session }
+      )
+    })
+  })
+}
+
+/**
+ * Activate a pending chunk
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @param {object} [opts]
+ * @param {ClientSession} [opts.session]
+ */
+async function activateChunk(projectId, chunkId, opts = {}) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.mongoId(chunkId, 'bad chunkId')
+
+  let result
+  try {
+    result = await mongodb.chunks.updateOne(
+      {
+        _id: new ObjectId(chunkId),
+        projectId: new ObjectId(projectId),
+        state: 'pending',
+      },
+      { $set: { state: 'active', updatedAt: new Date() } },
+      opts
+    )
+  } catch (err) {
+    if (err instanceof MongoError && err.code === DUPLICATE_KEY_ERROR_CODE) {
+      throw new ChunkVersionConflictError('chunk start version is not unique', {
+        projectId,
+        chunkId,
+      })
+    } else {
+      throw err
+    }
+  }
+  if (result.matchedCount === 0) {
+    throw new OError('pending chunk not found', { projectId, chunkId })
+  }
+}
+
+/**
+ * Close a chunk
+ *
+ * A closed chunk is one that can't be extended anymore.
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @param {object} [opts]
+ * @param {ClientSession} [opts.session]
+ */
+async function closeChunk(projectId, chunkId, opts = {}) {
+  const result = await mongodb.chunks.updateOne(
+    {
+      _id: new ObjectId(chunkId),
+      projectId: new ObjectId(projectId),
+      state: 'active',
+    },
+    { $set: { state: 'closed' } },
+    opts
+  )
+
+  if (result.matchedCount === 0) {
+    throw new ChunkVersionConflictError('unable to close chunk', {
+      projectId,
+      chunkId,
+    })
+  }
+}
+
+/**
+ * Delete an active chunk
+ *
+ * This is used to delete chunks that are in the process of being extended. It
+ * will refuse to delete chunks that are already closed and can therefore not be
+ * extended.
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @param {object} [opts]
+ * @param {ClientSession} [opts.session]
+ */
+async function deleteActiveChunk(projectId, chunkId, opts = {}) {
+  const updateResult = await mongodb.chunks.updateOne(
+    {
+      _id: new ObjectId(chunkId),
+      projectId: new ObjectId(projectId),
+      state: 'active',
+    },
+    { $set: { state: 'deleted', updatedAt: new Date() } },
+    opts
+  )
+
+  if (updateResult.matchedCount === 0) {
+    throw new ChunkVersionConflictError('unable to delete active chunk', {
+      projectId,
+      chunkId,
+    })
+  }
+}
+
+/**
+ * Delete a chunk.
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @return {Promise}
+ */
+async function deleteChunk(projectId, chunkId, mongoOpts = {}) {
+  assert.mongoId(projectId, 'bad projectId')
+  assert.mongoId(chunkId, 'bad chunkId')
+
+  await mongodb.chunks.updateOne(
+    { _id: new ObjectId(chunkId), projectId: new ObjectId(projectId) },
+    { $set: { state: 'deleted', updatedAt: new Date() } },
+    mongoOpts
+  )
+}
+
+/**
+ * Delete all of a project's chunks
+ */
+async function deleteProjectChunks(projectId) {
+  assert.mongoId(projectId, 'bad projectId')
+
+  await mongodb.chunks.updateMany(
+    {
+      projectId: new ObjectId(projectId),
+      state: { $in: ['active', 'closed'] },
+    },
+    { $set: { state: 'deleted', updatedAt: new Date() } }
+  )
+}
+
+/**
+ * Get a batch of old chunks for deletion
+ */
+async function getOldChunksBatch(count, minAgeSecs) {
+  const maxUpdatedAt = new Date(Date.now() - minAgeSecs * 1000)
+  const batch = []
+
+  // We need to fetch one state at a time to take advantage of the partial
+  // indexes on the chunks collection.
+  //
+  // Mongo 6.0 allows partial indexes that use the $in operator. When we reach
+  // that Mongo version, we can create a partial index on both the deleted and
+  // pending states and simplify this logic a bit.
+  for (const state of ['deleted', 'pending']) {
+    if (count === 0) {
+      // There's no more space in the batch
+      break
+    }
+
+    const cursor = mongodb.chunks
+      .find(
+        { state, updatedAt: { $lt: maxUpdatedAt } },
+        {
+          limit: count,
+          projection: { _id: 1, projectId: 1 },
+        }
+      )
+      .map(record => ({
+        chunkId: record._id.toString(),
+        projectId: record.projectId.toString(),
+      }))
+
+    for await (const record of cursor) {
+      batch.push(record)
+      count -= 1
+    }
+  }
+  return batch
+}
+
+/**
+ * Delete a batch of old chunks from the database
+ */
+async function deleteOldChunks(chunkIds) {
+  await mongodb.chunks.deleteMany({
+    _id: { $in: chunkIds.map(id => new ObjectId(id)) },
+    state: { $in: ['deleted', 'pending'] },
+  })
+}
+
+/**
+ * Build a chunk metadata object from the database record
+ */
+function chunkFromRecord(record) {
+  return {
+    id: record._id.toString(),
+    startVersion: record.startVersion,
+    endVersion: record.endVersion,
+    endTimestamp: record.endTimestamp,
+  }
+}
+
+module.exports = {
+  getLatestChunk,
+  getFirstChunkBeforeTimestamp,
+  getLastActiveChunkBeforeTimestamp,
+  getChunkForVersion,
+  getChunkForTimestamp,
+  getProjectChunkIds,
+  getProjectChunks,
+  insertPendingChunk,
+  confirmCreate,
+  confirmUpdate,
+  updateProjectRecord,
+  deleteChunk,
+  deleteProjectChunks,
+  getOldChunksBatch,
+  deleteOldChunks,
+}
--- a/services/history-v1/storage/lib/chunk_store/postgres.js
+++ b/services/history-v1/storage/lib/chunk_store/postgres.js
@@ -0,0 +1,487 @@
+// @ts-check
+
+const { Chunk } = require('overleaf-editor-core')
+const assert = require('../assert')
+const knex = require('../knex')
+const knexReadOnly = require('../knex_read_only')
+const { ChunkVersionConflictError } = require('./errors')
+const { updateProjectRecord } = require('./mongo')
+
+const DUPLICATE_KEY_ERROR_CODE = '23505'
+
+/**
+ * @import { Knex } from 'knex'
+ */
+
+/**
+ * Get the latest chunk's metadata from the database
+ * @param {string} projectId
+ * @param {Object} [opts]
+ * @param {boolean} [opts.readOnly]
+ */
+async function getLatestChunk(projectId, opts = {}) {
+  assert.postgresId(projectId, 'bad projectId')
+  const { readOnly = false } = opts
+
+  const record = await (readOnly ? knexReadOnly : knex)('chunks')
+    .where('doc_id', parseInt(projectId, 10))
+    .orderBy('end_version', 'desc')
+    .first()
+  if (record == null) {
+    return null
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the given version.
+ *
+ * @param {string} projectId
+ * @param {number} version
+ */
+async function getChunkForVersion(projectId, version) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  const record = await knex('chunks')
+    .where('doc_id', parseInt(projectId, 10))
+    .where('end_version', '>=', version)
+    .orderBy('end_version')
+    .first()
+  if (!record) {
+    throw new Chunk.VersionNotFoundError(projectId, version)
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the given version.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
+ */
+async function getFirstChunkBeforeTimestamp(projectId, timestamp) {
+  assert.date(timestamp, 'bad timestamp')
+
+  const recordActive = await getChunkForVersion(projectId, 0)
+
+  // projectId must be valid if getChunkForVersion did not throw
+  if (recordActive && recordActive.endTimestamp <= timestamp) {
+    return recordActive
+  }
+
+  // fallback to deleted chunk
+  const recordDeleted = await knex('old_chunks')
+    .where('doc_id', parseInt(projectId, 10))
+    .where('start_version', '=', 0)
+    .where('end_timestamp', '<=', timestamp)
+    .orderBy('end_version', 'desc')
+    .first()
+  if (recordDeleted) {
+    return chunkFromRecord(recordDeleted)
+  }
+  throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+}
+
+/**
+ * Get the metadata for the chunk that contains the version that was current at
+ * the given timestamp.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
+ */
+async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) {
+  assert.date(timestamp, 'bad timestamp')
+  assert.postgresId(projectId, 'bad projectId')
+
+  const query = knex('chunks')
+    .where('doc_id', parseInt(projectId, 10))
+    .where(function () {
+      this.where('end_timestamp', '<=', timestamp).orWhere(
+        'end_timestamp',
+        null
+      )
+    })
+    .orderBy('end_version', 'desc', 'last')
+
+  const record = await query.first()
+
+  if (!record) {
+    throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Get the metadata for the chunk that contains the version that was current at
+ * the given timestamp.
+ *
+ * @param {string} projectId
+ * @param {Date} timestamp
+ */
+async function getChunkForTimestamp(projectId, timestamp) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  // This query will find the latest chunk after the timestamp (query orders
+  // in reverse chronological order), OR the latest chunk
+  // This accounts for the case where the timestamp is ahead of the chunk's
+  // timestamp and therefore will not return any results
+  const whereAfterEndTimestampOrLatestChunk = knex.raw(
+    'end_timestamp >= ? ' +
+      'OR id = ( ' +
+      'SELECT id FROM chunks ' +
+      'WHERE doc_id = ? ' +
+      'ORDER BY end_version desc LIMIT 1' +
+      ')',
+    [timestamp, parseInt(projectId, 10)]
+  )
+
+  const record = await knex('chunks')
+    .where('doc_id', parseInt(projectId, 10))
+    .where(whereAfterEndTimestampOrLatestChunk)
+    .orderBy('end_version')
+    .first()
+  if (!record) {
+    throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
+  }
+  return chunkFromRecord(record)
+}
+
+/**
+ * Build a chunk metadata object from the database record
+ */
+function chunkFromRecord(record) {
+  return {
+    id: record.id.toString(),
+    startVersion: record.start_version,
+    endVersion: record.end_version,
+    endTimestamp: record.end_timestamp,
+  }
+}
+
+/**
+ * Get all of a project's chunk ids
+ *
+ * @param {string} projectId
+ */
+async function getProjectChunkIds(projectId) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  const records = await knex('chunks')
+    .select('id')
+    .where('doc_id', parseInt(projectId, 10))
+  return records.map(record => record.id)
+}
+
+/**
+ * Get all of a projects chunks directly
+ *
+ * @param {string} projectId
+ */
+async function getProjectChunks(projectId) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  const records = await knex('chunks')
+    .select()
+    .where('doc_id', parseInt(projectId, 10))
+    .orderBy('end_version')
+  return records.map(chunkFromRecord)
+}
+
+/**
+ * Insert a pending chunk before sending it to object storage.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ */
+async function insertPendingChunk(projectId, chunk) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  const result = await knex.first(
+    knex.raw("nextval('chunks_id_seq'::regclass)::integer as chunkid")
+  )
+  const chunkId = result.chunkid
+  await knex('pending_chunks').insert({
+    id: chunkId,
+    doc_id: parseInt(projectId, 10),
+    end_version: chunk.getEndVersion(),
+    start_version: chunk.getStartVersion(),
+    end_timestamp: chunk.getEndTimestamp(),
+  })
+  return chunkId.toString()
+}
+
+/**
+ * Record that a new chunk was created.
+ *
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {string} chunkId
+ * @param {object} opts
+ * @param {Date} [opts.earliestChangeTimestamp]
+ * @param {string} [opts.oldChunkId]
+ */
+async function confirmCreate(projectId, chunk, chunkId, opts = {}) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  await knex.transaction(async tx => {
+    if (opts.oldChunkId != null) {
+      await _assertChunkIsNotClosed(tx, projectId, opts.oldChunkId)
+      await _closeChunk(tx, projectId, opts.oldChunkId)
+    }
+    await Promise.all([
+      _deletePendingChunk(tx, projectId, chunkId),
+      _insertChunk(tx, projectId, chunk, chunkId),
+    ])
+    await updateProjectRecord(
+      // The history id in Mongo is an integer for Postgres projects
+      parseInt(projectId, 10),
+      chunk,
+      opts.earliestChangeTimestamp
+    )
+  })
+}
+
+/**
+ * Record that a chunk was replaced by a new one.
+ *
+ * @param {string} projectId
+ * @param {string} oldChunkId
+ * @param {Chunk} newChunk
+ * @param {string} newChunkId
+ */
+async function confirmUpdate(
+  projectId,
+  oldChunkId,
+  newChunk,
+  newChunkId,
+  opts = {}
+) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  await knex.transaction(async tx => {
+    await _assertChunkIsNotClosed(tx, projectId, oldChunkId)
+    await _deleteChunks(tx, { doc_id: projectId, id: oldChunkId })
+    await Promise.all([
+      _deletePendingChunk(tx, projectId, newChunkId),
+      _insertChunk(tx, projectId, newChunk, newChunkId),
+    ])
+    await updateProjectRecord(
+      // The history id in Mongo is an integer for Postgres projects
+      parseInt(projectId, 10),
+      newChunk,
+      opts.earliestChangeTimestamp
+    )
+  })
+}
+
+/**
+ * Delete a pending chunk
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
+async function _deletePendingChunk(tx, projectId, chunkId) {
+  await tx('pending_chunks')
+    .where({
+      doc_id: parseInt(projectId, 10),
+      id: parseInt(chunkId, 10),
+    })
+    .del()
+}
+
+/**
+ * Adds an active chunk
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {Chunk} chunk
+ * @param {string} chunkId
+ */
+async function _insertChunk(tx, projectId, chunk, chunkId) {
+  const startVersion = chunk.getStartVersion()
+  const endVersion = chunk.getEndVersion()
+  try {
+    await tx('chunks').insert({
+      id: parseInt(chunkId, 10),
+      doc_id: parseInt(projectId, 10),
+      start_version: startVersion,
+      end_version: endVersion,
+      end_timestamp: chunk.getEndTimestamp(),
+    })
+  } catch (err) {
+    if (
+      err instanceof Error &&
+      'code' in err &&
+      err.code === DUPLICATE_KEY_ERROR_CODE
+    ) {
+      throw new ChunkVersionConflictError(
+        'chunk start or end version is not unique',
+        { projectId, chunkId, startVersion, endVersion }
+      )
+    }
+    throw err
+  }
+}
+
+/**
+ * Check that a chunk is not closed
+ *
+ * This is used to synchronize chunk creations and extensions.
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
+async function _assertChunkIsNotClosed(tx, projectId, chunkId) {
+  const record = await tx('chunks')
+    .forUpdate()
+    .select('closed')
+    .where('doc_id', parseInt(projectId, 10))
+    .where('id', parseInt(chunkId, 10))
+    .first()
+  if (!record) {
+    throw new ChunkVersionConflictError('unable to close chunk: not found', {
+      projectId,
+      chunkId,
+    })
+  }
+  if (record.closed) {
+    throw new ChunkVersionConflictError(
+      'unable to close chunk: already closed',
+      {
+        projectId,
+        chunkId,
+      }
+    )
+  }
+}
+
+/**
+ * Close a chunk
+ *
+ * A closed chunk can no longer be extended.
+ *
+ * @param {Knex} tx
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
+async function _closeChunk(tx, projectId, chunkId) {
+  await tx('chunks')
+    .update({ closed: true })
+    .where('doc_id', parseInt(projectId, 10))
+    .where('id', parseInt(chunkId, 10))
+}
+
+/**
+ * Delete a chunk.
+ *
+ * @param {string} projectId
+ * @param {string} chunkId
+ */
+async function deleteChunk(projectId, chunkId) {
+  assert.postgresId(projectId, 'bad projectId')
+  assert.integer(chunkId, 'bad chunkId')
+
+  await _deleteChunks(knex, {
+    doc_id: parseInt(projectId, 10),
+    id: parseInt(chunkId, 10),
+  })
+}
+
+/**
+ * Delete all of a project's chunks
+ *
+ * @param {string} projectId
+ */
+async function deleteProjectChunks(projectId) {
+  assert.postgresId(projectId, 'bad projectId')
+
+  await knex.transaction(async tx => {
+    await _deleteChunks(knex, { doc_id: parseInt(projectId, 10) })
+  })
+}
+
+/**
+ * Delete many chunks
+ *
+ * @param {Knex} tx
+ * @param {any} whereClause
+ */
+async function _deleteChunks(tx, whereClause) {
+  const rows = await tx('chunks').where(whereClause).del().returning('*')
+  if (rows.length === 0) {
+    return
+  }
+
+  const oldChunks = rows.map(row => ({
+    doc_id: row.doc_id,
+    chunk_id: row.id,
+    start_version: row.start_version,
+    end_version: row.end_version,
+    end_timestamp: row.end_timestamp,
+    deleted_at: tx.fn.now(),
+  }))
+  await tx('old_chunks').insert(oldChunks)
+}
+
+/**
+ * Get a batch of old chunks for deletion
+ *
+ * @param {number} count
+ * @param {number} minAgeSecs
+ */
+async function getOldChunksBatch(count, minAgeSecs) {
+  const maxDeletedAt = new Date(Date.now() - minAgeSecs * 1000)
+  const records = await knex('old_chunks')
+    .whereNull('deleted_at')
+    .orWhere('deleted_at', '<', maxDeletedAt)
+    .orderBy('chunk_id')
+    .limit(count)
+  return records.map(oldChunk => ({
+    projectId: oldChunk.doc_id.toString(),
+    chunkId: oldChunk.chunk_id.toString(),
+  }))
+}
+
+/**
+ * Delete a batch of old chunks from the database
+ *
+ * @param {string[]} chunkIds
+ */
+async function deleteOldChunks(chunkIds) {
+  await knex('old_chunks')
+    .whereIn(
+      'chunk_id',
+      chunkIds.map(id => parseInt(id, 10))
+    )
+    .del()
+}
+
+/**
+ * Generate a new project id
+ */
+async function generateProjectId() {
+  const record = await knex.first(
+    knex.raw("nextval('docs_id_seq'::regclass)::integer as doc_id")
+  )
+  return record.doc_id.toString()
+}
+
+module.exports = {
+  getLatestChunk,
+  getFirstChunkBeforeTimestamp,
+  getLastActiveChunkBeforeTimestamp,
+  getChunkForVersion,
+  getChunkForTimestamp,
+  getProjectChunkIds,
+  getProjectChunks,
+  insertPendingChunk,
+  confirmCreate,
+  confirmUpdate,
+  deleteChunk,
+  deleteProjectChunks,
+  getOldChunksBatch,
+  deleteOldChunks,
+  generateProjectId,
+}
--- a/services/history-v1/storage/lib/chunk_store/redis.js
+++ b/services/history-v1/storage/lib/chunk_store/redis.js
@@ -0,0 +1,254 @@
+const metrics = require('@overleaf/metrics')
+const logger = require('@overleaf/logger')
+const redis = require('../redis')
+const rclient = redis.rclientHistory //
+const { Snapshot, Change, History, Chunk } = require('overleaf-editor-core')
+
+const TEMPORARY_CACHE_LIFETIME = 300 // 5 minutes
+
+const keySchema = {
+  snapshot({ projectId }) {
+    return `snapshot:{${projectId}}`
+  },
+  startVersion({ projectId }) {
+    return `snapshot-version:{${projectId}}`
+  },
+  changes({ projectId }) {
+    return `changes:{${projectId}}`
+  },
+}
+
+rclient.defineCommand('get_current_chunk', {
+  numberOfKeys: 3,
+  lua: `
+      local startVersionValue = redis.call('GET', KEYS[2])
+      if not startVersionValue then
+        return nil -- this is a cache-miss
+      end
+      local snapshotValue = redis.call('GET', KEYS[1])
+      local changesValues = redis.call('LRANGE', KEYS[3], 0, -1)
+      return {snapshotValue, startVersionValue, changesValues}
+    `,
+})
+
+/**
+ * Retrieves the current chunk of project history from Redis storage
+ * @param {string} projectId - The unique identifier of the project
+ * @returns {Promise<Chunk|null>} A Promise that resolves to a Chunk object containing project history,
+ *                               or null if retrieval fails
+ * @throws {Error} If Redis operations fail
+ */
+async function getCurrentChunk(projectId) {
+  try {
+    const result = await rclient.get_current_chunk(
+      keySchema.snapshot({ projectId }),
+      keySchema.startVersion({ projectId }),
+      keySchema.changes({ projectId })
+    )
+    if (!result) {
+      return null // cache-miss
+    }
+    const snapshot = Snapshot.fromRaw(JSON.parse(result[0]))
+    const startVersion = JSON.parse(result[1])
+    const changes = result[2].map(c => Change.fromRaw(JSON.parse(c)))
+    const history = new History(snapshot, changes)
+    const chunk = new Chunk(history, startVersion)
+    metrics.inc('chunk_store.redis.get_current_chunk', 1, { status: 'success' })
+    return chunk
+  } catch (err) {
+    logger.error({ err, projectId }, 'error getting current chunk from redis')
+    metrics.inc('chunk_store.redis.get_current_chunk', 1, { status: 'error' })
+    return null
+  }
+}
+
+rclient.defineCommand('get_current_chunk_metadata', {
+  numberOfKeys: 2,
+  lua: `
+      local startVersionValue = redis.call('GET', KEYS[1])
+      local changesCount = redis.call('LLEN', KEYS[2])
+      return {startVersionValue, changesCount}
+    `,
+})
+
+/**
+ * Retrieves the current chunk metadata for a given project from Redis
+ * @param {string} projectId - The ID of the project to get metadata for
+ * @returns {Promise<Object|null>} Object containing startVersion and changesCount if found, null on error or cache miss
+ * @property {number} startVersion - The starting version information
+ * @property {number} changesCount - The number of changes in the chunk
+ */
+async function getCurrentChunkMetadata(projectId) {
+  try {
+    const result = await rclient.get_current_chunk_metadata(
+      keySchema.startVersion({ projectId }),
+      keySchema.changes({ projectId })
+    )
+    if (!result) {
+      return null // cache-miss
+    }
+    const startVersion = JSON.parse(result[0])
+    const changesCount = parseInt(result[1], 10)
+    return { startVersion, changesCount }
+  } catch (err) {
+    return null
+  }
+}
+
+rclient.defineCommand('set_current_chunk', {
+  numberOfKeys: 3,
+  lua: `
+      local snapshotValue = ARGV[1]
+      local startVersionValue = ARGV[2]
+      redis.call('SETEX', KEYS[1], ${TEMPORARY_CACHE_LIFETIME}, snapshotValue)
+      redis.call('SETEX', KEYS[2], ${TEMPORARY_CACHE_LIFETIME}, startVersionValue)
+      redis.call('DEL', KEYS[3]) -- clear the old changes list
+      if #ARGV >= 3 then
+        redis.call('RPUSH', KEYS[3], unpack(ARGV, 3))
+        redis.call('EXPIRE', KEYS[3], ${TEMPORARY_CACHE_LIFETIME})
+      end
+    `,
+})
+
+/**
+ * Stores the current chunk of project history in Redis
+ * @param {string} projectId - The ID of the project
+ * @param {Chunk} chunk - The chunk object containing history data
+ * @returns {Promise<*>} Returns the result of the Redis operation, or null if an error occurs
+ * @throws {Error} May throw Redis-related errors which are caught internally
+ */
+async function setCurrentChunk(projectId, chunk) {
+  try {
+    const snapshotKey = keySchema.snapshot({ projectId })
+    const startVersionKey = keySchema.startVersion({ projectId })
+    const changesKey = keySchema.changes({ projectId })
+
+    const snapshot = chunk.history.snapshot
+    const startVersion = chunk.startVersion
+    const changes = chunk.history.changes
+
+    await rclient.set_current_chunk(
+      snapshotKey,
+      startVersionKey,
+      changesKey,
+      JSON.stringify(snapshot.toRaw()),
+      startVersion,
+      ...changes.map(c => JSON.stringify(c.toRaw()))
+    )
+    metrics.inc('chunk_store.redis.set_current_chunk', 1, { status: 'success' })
+  } catch (err) {
+    logger.error(
+      { err, projectId, chunk },
+      'error setting current chunk inredis'
+    )
+    metrics.inc('chunk_store.redis.set_current_chunk', 1, { status: 'error' })
+    return null // while testing we will suppress any errors
+  }
+}
+
+/**
+ * Checks whether a cached chunk's version metadata matches the current chunk's metadata
+ * @param {Chunk} cachedChunk - The chunk retrieved from cache
+ * @param {Chunk} currentChunk - The current chunk to compare against
+ * @returns {boolean} - Returns true if the chunks have matching start and end versions, false otherwise
+ */
+function checkCacheValidity(cachedChunk, currentChunk) {
+  return Boolean(
+    cachedChunk &&
+      cachedChunk.getStartVersion() === currentChunk.getStartVersion() &&
+      cachedChunk.getEndVersion() === currentChunk.getEndVersion()
+  )
+}
+
+/**
+ * Validates if a cached chunk matches the current chunk metadata by comparing versions
+ * @param {Object} cachedChunk - The cached chunk object to validate
+ * @param {Object} currentChunkMetadata - The current chunk metadata to compare against
+ * @param {number} currentChunkMetadata.startVersion - The starting version number
+ * @param {number} currentChunkMetadata.endVersion - The ending version number
+ * @returns {boolean} - True if the cached chunk is valid, false otherwise
+ */
+function checkCacheValidityWithMetadata(cachedChunk, currentChunkMetadata) {
+  return Boolean(
+    cachedChunk &&
+      cachedChunk.getStartVersion() === currentChunkMetadata.startVersion &&
+      cachedChunk.getEndVersion() === currentChunkMetadata.endVersion
+  )
+}
+
+/**
+ * Compares two chunks for equality using stringified JSON comparison
+ * @param {string} projectId - The ID of the project
+ * @param {Chunk} cachedChunk - The cached chunk to compare
+ * @param {Chunk} currentChunk - The current chunk to compare against
+ * @returns {boolean} - Returns false if either chunk is null/undefined, otherwise returns the comparison result
+ */
+function compareChunks(projectId, cachedChunk, currentChunk) {
+  if (!cachedChunk || !currentChunk) {
+    return false
+  }
+  const identical = JSON.stringify(cachedChunk) === JSON.stringify(currentChunk)
+  if (!identical) {
+    try {
+      logger.error(
+        {
+          projectId,
+          cachedChunkStartVersion: cachedChunk.getStartVersion(),
+          cachedChunkEndVersion: cachedChunk.getEndVersion(),
+          currentChunkStartVersion: currentChunk.getStartVersion(),
+          currentChunkEndVersion: currentChunk.getEndVersion(),
+        },
+        'chunk cache mismatch'
+      )
+    } catch (err) {
+      // ignore errors while logging
+    }
+  }
+  metrics.inc('chunk_store.redis.compare_chunks', 1, {
+    status: identical ? 'success' : 'fail',
+  })
+  return identical
+}
+
+// Define Lua script for atomic cache clearing
+rclient.defineCommand('clear_chunk_cache', {
+  numberOfKeys: 3,
+  lua: `
+    -- Delete all keys related to a project's chunk cache atomically
+    redis.call('DEL', KEYS[1]) -- snapshot key
+    redis.call('DEL', KEYS[2]) -- startVersion key
+    redis.call('DEL', KEYS[3]) -- changes key
+    return 1
+  `,
+})
+
+/**
+ * Clears all cache entries for a project's chunk data
+ * @param {string} projectId - The ID of the project whose cache should be cleared
+ * @returns {Promise<boolean>} A promise that resolves to true if successful, false on error
+ */
+async function clearCache(projectId) {
+  try {
+    const snapshotKey = keySchema.snapshot({ projectId })
+    const startVersionKey = keySchema.startVersion({ projectId })
+    const changesKey = keySchema.changes({ projectId })
+
+    await rclient.clear_chunk_cache(snapshotKey, startVersionKey, changesKey)
+    metrics.inc('chunk_store.redis.clear_cache', 1, { status: 'success' })
+    return true
+  } catch (err) {
+    logger.error({ err, projectId }, 'error clearing chunk cache from redis')
+    metrics.inc('chunk_store.redis.clear_cache', 1, { status: 'error' })
+    return false
+  }
+}
+
+module.exports = {
+  getCurrentChunk,
+  setCurrentChunk,
+  getCurrentChunkMetadata,
+  checkCacheValidity,
+  checkCacheValidityWithMetadata,
+  compareChunks,
+  clearCache,
+}
--- a/services/history-v1/storage/lib/content_hash.js
+++ b/services/history-v1/storage/lib/content_hash.js
@@ -0,0 +1,18 @@
+// @ts-check
+
+const { createHash } = require('node:crypto')
+
+/**
+ * Compute a SHA-1 hash of the content
+ *
+ * This is used to validate incoming updates.
+ *
+ * @param {string} content
+ */
+function getContentHash(content) {
+  const hash = createHash('sha-1')
+  hash.update(content)
+  return hash.digest('hex')
+}
+
+module.exports = { getContentHash }
--- a/services/history-v1/storage/lib/errors.js
+++ b/services/history-v1/storage/lib/errors.js
@@ -0,0 +1,5 @@
+const OError = require('@overleaf/o-error')
+
+class InvalidChangeError extends OError {}
+
+module.exports = { InvalidChangeError }
--- a/services/history-v1/storage/lib/hash_check_blob_store.js
+++ b/services/history-v1/storage/lib/hash_check_blob_store.js
@@ -0,0 +1,30 @@
+const Blob = require('overleaf-editor-core').Blob
+const blobHash = require('./blob_hash')
+const BPromise = require('bluebird')
+
+// We want to simulate applying all of the operations so we can return the
+// resulting hashes to the caller for them to check. To do this, we need to be
+// able to take the lazy files in the final snapshot, fetch their content, and
+// compute the new content hashes. We don't, however, need to actually store
+// that content; we just need to get the hash.
+function HashCheckBlobStore(realBlobStore) {
+  this.realBlobStore = realBlobStore
+}
+
+HashCheckBlobStore.prototype.getString = BPromise.method(
+  function hashCheckBlobStoreGetString(hash) {
+    return this.realBlobStore.getString(hash)
+  }
+)
+
+HashCheckBlobStore.prototype.putString = BPromise.method(
+  function hashCheckBlobStorePutString(string) {
+    return new Blob(
+      blobHash.fromString(string),
+      Buffer.byteLength(string),
+      string.length
+    )
+  }
+)
+
+module.exports = HashCheckBlobStore
--- a/services/history-v1/storage/lib/history_store.js
+++ b/services/history-v1/storage/lib/history_store.js
@@ -0,0 +1,202 @@
+// @ts-check
+'use strict'
+
+const core = require('overleaf-editor-core')
+
+const config = require('config')
+const path = require('node:path')
+const Stream = require('node:stream')
+const { promisify } = require('node:util')
+const zlib = require('node:zlib')
+
+const OError = require('@overleaf/o-error')
+const objectPersistor = require('@overleaf/object-persistor')
+const logger = require('@overleaf/logger')
+
+const assert = require('./assert')
+const persistor = require('./persistor')
+const projectKey = require('./project_key')
+const streams = require('./streams')
+
+const Chunk = core.Chunk
+
+const gzip = promisify(zlib.gzip)
+const gunzip = promisify(zlib.gunzip)
+
+class LoadError extends OError {
+  /**
+   * @param {string} projectId
+   * @param {string} chunkId
+   * @param {any} cause
+   */
+  constructor(projectId, chunkId, cause) {
+    super(
+      'HistoryStore: failed to load chunk history',
+      { projectId, chunkId },
+      cause
+    )
+    this.projectId = projectId
+    this.chunkId = chunkId
+  }
+}
+
+class StoreError extends OError {
+  /**
+   * @param {string} projectId
+   * @param {string} chunkId
+   * @param {any} cause
+   */
+  constructor(projectId, chunkId, cause) {
+    super(
+      'HistoryStore: failed to store chunk history',
+      { projectId, chunkId },
+      cause
+    )
+    this.projectId = projectId
+    this.chunkId = chunkId
+  }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} chunkId
+ * @return {string}
+ */
+function getKey(projectId, chunkId) {
+  return path.join(projectKey.format(projectId), projectKey.pad(chunkId))
+}
+
+/**
+ * Store and retreive raw {@link History} objects from bucket. Mainly used via the
+ * {@link ChunkStore}.
+ *
+ * Histories are stored as gzipped JSON blobs, keyed on the project ID and the
+ * ID of the Chunk that owns the history. The project ID is currently redundant,
+ * but I think it might help in future if we have to shard on project ID, and
+ * it gives us some chance of reconstructing histories even if there is a
+ * problem with the chunk metadata in the database.
+ *
+ * @class
+ */
+class HistoryStore {
+  #persistor
+  #bucket
+  constructor(persistor, bucket) {
+    this.#persistor = persistor
+    this.#bucket = bucket
+  }
+
+  /**
+   * Load the raw object for a History.
+   *
+   * @param {string} projectId
+   * @param {string} chunkId
+   * @return {Promise<import('overleaf-editor-core/lib/types').RawHistory>}
+   */
+  async loadRaw(projectId, chunkId) {
+    assert.projectId(projectId, 'bad projectId')
+    assert.chunkId(chunkId, 'bad chunkId')
+
+    const key = getKey(projectId, chunkId)
+
+    logger.debug({ projectId, chunkId }, 'loadRaw started')
+    try {
+      const buf = await streams.gunzipStreamToBuffer(
+        await this.#persistor.getObjectStream(this.#bucket, key)
+      )
+      return JSON.parse(buf.toString('utf-8'))
+    } catch (err) {
+      if (err instanceof objectPersistor.Errors.NotFoundError) {
+        throw new Chunk.NotPersistedError(projectId)
+      }
+      throw new LoadError(projectId, chunkId, err)
+    } finally {
+      logger.debug({ projectId, chunkId }, 'loadRaw finished')
+    }
+  }
+
+  async loadRawWithBuffer(projectId, chunkId) {
+    assert.projectId(projectId, 'bad projectId')
+    assert.chunkId(chunkId, 'bad chunkId')
+
+    const key = getKey(projectId, chunkId)
+
+    logger.debug({ projectId, chunkId }, 'loadBuffer started')
+    try {
+      const buf = await streams.readStreamToBuffer(
+        await this.#persistor.getObjectStream(this.#bucket, key)
+      )
+      const unzipped = await gunzip(buf)
+      return {
+        buffer: buf,
+        raw: JSON.parse(unzipped.toString('utf-8')),
+      }
+    } catch (err) {
+      if (err instanceof objectPersistor.Errors.NotFoundError) {
+        throw new Chunk.NotPersistedError(projectId)
+      }
+      throw new LoadError(projectId, chunkId, err)
+    } finally {
+      logger.debug({ projectId, chunkId }, 'loadBuffer finished')
+    }
+  }
+
+  /**
+   * Compress and store a {@link History}.
+   *
+   * @param {string} projectId
+   * @param {string} chunkId
+   * @param {import('overleaf-editor-core/lib/types').RawHistory} rawHistory
+   */
+  async storeRaw(projectId, chunkId, rawHistory) {
+    assert.projectId(projectId, 'bad projectId')
+    assert.chunkId(chunkId, 'bad chunkId')
+    assert.object(rawHistory, 'bad rawHistory')
+
+    const key = getKey(projectId, chunkId)
+
+    logger.debug({ projectId, chunkId }, 'storeRaw started')
+
+    const buf = await gzip(JSON.stringify(rawHistory))
+    try {
+      await this.#persistor.sendStream(
+        this.#bucket,
+        key,
+        Stream.Readable.from([buf]),
+        {
+          contentType: 'application/json',
+          contentEncoding: 'gzip',
+          contentLength: buf.byteLength,
+        }
+      )
+    } catch (err) {
+      throw new StoreError(projectId, chunkId, err)
+    } finally {
+      logger.debug({ projectId, chunkId }, 'storeRaw finished')
+    }
+  }
+
+  /**
+   * Delete multiple chunks from bucket. Expects an Array of objects with
+   * projectId and chunkId properties
+   * @param {Array<{projectId: string,chunkId:string}>} chunks
+   */
+  async deleteChunks(chunks) {
+    logger.debug({ chunks }, 'deleteChunks started')
+    try {
+      await Promise.all(
+        chunks.map(chunk => {
+          const key = getKey(chunk.projectId, chunk.chunkId)
+          return this.#persistor.deleteObject(this.#bucket, key)
+        })
+      )
+    } finally {
+      logger.debug({ chunks }, 'deleteChunks finished')
+    }
+  }
+}
+
+module.exports = {
+  HistoryStore,
+  historyStore: new HistoryStore(persistor, config.get('chunkStore.bucket')),
+}
--- a/services/history-v1/storage/lib/knex.js
+++ b/services/history-v1/storage/lib/knex.js
@@ -0,0 +1,8 @@
+// @ts-check
+
+'use strict'
+
+const env = process.env.NODE_ENV || 'development'
+
+const knexfile = require('../../knexfile')
+module.exports = require('knex').default(knexfile[env])
--- a/services/history-v1/storage/lib/knex_read_only.js
+++ b/services/history-v1/storage/lib/knex_read_only.js
@@ -0,0 +1,19 @@
+'use strict'
+
+const config = require('config')
+const knexfile = require('../../knexfile')
+
+const env = process.env.NODE_ENV || 'development'
+
+if (config.databaseUrlReadOnly) {
+  module.exports = require('knex')({
+    ...knexfile[env],
+    pool: {
+      ...knexfile[env].pool,
+      min: 0,
+    },
+    connection: config.databaseUrlReadOnly,
+  })
+} else {
+  module.exports = require('./knex')
+}
--- a/services/history-v1/storage/lib/mongodb.js
+++ b/services/history-v1/storage/lib/mongodb.js
@@ -0,0 +1,30 @@
+const Metrics = require('@overleaf/metrics')
+
+const config = require('config')
+const { MongoClient } = require('mongodb')
+
+const client = new MongoClient(config.mongo.uri)
+const db = client.db()
+
+const chunks = db.collection('projectHistoryChunks')
+const blobs = db.collection('projectHistoryBlobs')
+const globalBlobs = db.collection('projectHistoryGlobalBlobs')
+const shardedBlobs = db.collection('projectHistoryShardedBlobs')
+const projects = db.collection('projects')
+// Temporary collection for tracking progress of backed up old blobs (without a hash).
+// The initial sync process will be able to skip over these.
+// Schema: _id: projectId, blobs: [Binary]
+const backedUpBlobs = db.collection('projectHistoryBackedUpBlobs')
+
+Metrics.mongodb.monitor(client)
+
+module.exports = {
+  client,
+  db,
+  chunks,
+  blobs,
+  globalBlobs,
+  projects,
+  shardedBlobs,
+  backedUpBlobs,
+}
--- a/services/history-v1/storage/lib/persist_changes.js
+++ b/services/history-v1/storage/lib/persist_changes.js
@@ -0,0 +1,261 @@
+// @ts-check
+
+'use strict'
+
+const _ = require('lodash')
+const logger = require('@overleaf/logger')
+
+const core = require('overleaf-editor-core')
+const Chunk = core.Chunk
+const History = core.History
+
+const assert = require('./assert')
+const chunkStore = require('./chunk_store')
+const { BlobStore } = require('./blob_store')
+const { InvalidChangeError } = require('./errors')
+const { getContentHash } = require('./content_hash')
+
+function countChangeBytes(change) {
+  // Note: This is not quite accurate, because the raw change may contain raw
+  // file info (or conceivably even content) that will not be included in the
+  // actual stored object.
+  return Buffer.byteLength(JSON.stringify(change.toRaw()))
+}
+
+function totalChangeBytes(changes) {
+  return changes.length ? _(changes).map(countChangeBytes).sum() : 0
+}
+
+// provide a simple timer function
+function Timer() {
+  this.t0 = process.hrtime()
+}
+Timer.prototype.elapsed = function () {
+  const dt = process.hrtime(this.t0)
+  const timeInMilliseconds = (dt[0] + dt[1] * 1e-9) * 1e3
+  return timeInMilliseconds
+}
+
+/**
+ * Break the given set of changes into zero or more Chunks according to the
+ * provided limits and store them.
+ *
+ * Some other possible improvements:
+ * 1. This does a lot more JSON serialization than it has to. We may know the
+ *    JSON for the changes before we call this function, so we could in that
+ *    case get the byte size of each change without doing any work. Even if we
+ *    don't know it initially, we could save some computation by caching this
+ *    info rather than recomputing it many times. TBD whether it is worthwhile.
+ * 2. We don't necessarily have to fetch the latest chunk in order to determine
+ *    that it is full. We could store this in the chunk metadata record. It may
+ *    be worth distinguishing between a Chunk and its metadata record. The
+ *    endVersion may be better suited to the metadata record.
+ *
+ * @param {string} projectId
+ * @param {core.Change[]} allChanges
+ * @param {Object} limits
+ * @param {number} clientEndVersion
+ * @return {Promise.<Object?>}
+ */
+async function persistChanges(projectId, allChanges, limits, clientEndVersion) {
+  assert.projectId(projectId)
+  assert.array(allChanges)
+  assert.maybe.object(limits)
+  assert.integer(clientEndVersion)
+
+  const blobStore = new BlobStore(projectId)
+
+  const earliestChangeTimestamp =
+    allChanges.length > 0 ? allChanges[0].getTimestamp() : null
+
+  let currentChunk
+
+  /**
+   * currentSnapshot tracks the latest change that we're applying; we use it to
+   * check that the changes we are persisting are valid.
+   *
+   * @type {core.Snapshot}
+   */
+  let currentSnapshot
+
+  let originalEndVersion
+  let changesToPersist
+
+  limits = limits || {}
+  _.defaults(limits, {
+    changeBucketMinutes: 60,
+    maxChanges: 2500,
+    maxChangeBytes: 5 * 1024 * 1024,
+    maxChunkChanges: 2000,
+    maxChunkChangeBytes: 5 * 1024 * 1024,
+    maxChunkChangeTime: 5000, // warn if total time for changes in a chunk takes longer than this
+  })
+
+  function checkElapsedTime(timer) {
+    const timeTaken = timer.elapsed()
+    if (timeTaken > limits.maxChunkChangeTime) {
+      console.log('warning: slow chunk', projectId, timeTaken)
+    }
+  }
+
+  /**
+   * Add changes to a chunk until the chunk is full
+   *
+   * The chunk is full if it reaches a certain number of changes or a certain
+   * size in bytes
+   *
+   * @param {core.Chunk} chunk
+   * @param {core.Change[]} changes
+   */
+  async function fillChunk(chunk, changes) {
+    let totalBytes = totalChangeBytes(chunk.getChanges())
+    let changesPushed = false
+    while (changes.length > 0) {
+      if (chunk.getChanges().length >= limits.maxChunkChanges) {
+        break
+      }
+
+      const change = changes[0]
+      const changeBytes = countChangeBytes(change)
+
+      if (totalBytes + changeBytes > limits.maxChunkChangeBytes) {
+        break
+      }
+
+      for (const operation of change.iterativelyApplyTo(currentSnapshot, {
+        strict: true,
+      })) {
+        await validateContentHash(operation)
+      }
+
+      chunk.pushChanges([change])
+      changes.shift()
+      totalBytes += changeBytes
+      changesPushed = true
+    }
+    return changesPushed
+  }
+
+  /**
+   * Check that the operation is valid and can be incorporated to the history.
+   *
+   * For now, this checks content hashes when they are provided.
+   *
+   * @param {core.Operation} operation
+   */
+  async function validateContentHash(operation) {
+    if (operation instanceof core.EditFileOperation) {
+      const editOperation = operation.getOperation()
+      if (
+        editOperation instanceof core.TextOperation &&
+        editOperation.contentHash != null
+      ) {
+        const path = operation.getPathname()
+        const file = currentSnapshot.getFile(path)
+        if (file == null) {
+          throw new InvalidChangeError('file not found for hash validation', {
+            projectId,
+            path,
+          })
+        }
+        await file.load('eager', blobStore)
+        const content = file.getContent({ filterTrackedDeletes: true })
+        const expectedHash = editOperation.contentHash
+        const actualHash = content != null ? getContentHash(content) : null
+        logger.debug({ expectedHash, actualHash }, 'validating content hash')
+        if (actualHash !== expectedHash) {
+          throw new InvalidChangeError('content hash mismatch', {
+            projectId,
+            path,
+            expectedHash,
+            actualHash,
+          })
+        }
+
+        // Remove the content hash from the change before storing it in the chunk.
+        // It was only useful for validation.
+        editOperation.contentHash = null
+      }
+    }
+  }
+
+  async function extendLastChunkIfPossible() {
+    const latestChunk = await chunkStore.loadLatest(projectId)
+
+    currentChunk = latestChunk
+    originalEndVersion = latestChunk.getEndVersion()
+    if (originalEndVersion !== clientEndVersion) {
+      throw new Chunk.ConflictingEndVersion(
+        clientEndVersion,
+        originalEndVersion
+      )
+    }
+
+    currentSnapshot = latestChunk.getSnapshot().clone()
+    const timer = new Timer()
+    currentSnapshot.applyAll(latestChunk.getChanges())
+
+    const changesPushed = await fillChunk(currentChunk, changesToPersist)
+    if (!changesPushed) {
+      return
+    }
+
+    checkElapsedTime(timer)
+
+    await chunkStore.update(
+      projectId,
+      originalEndVersion,
+      currentChunk,
+      earliestChangeTimestamp
+    )
+  }
+
+  async function createNewChunksAsNeeded() {
+    while (changesToPersist.length > 0) {
+      const endVersion = currentChunk.getEndVersion()
+      const history = new History(currentSnapshot.clone(), [])
+      const chunk = new Chunk(history, endVersion)
+      const timer = new Timer()
+
+      const changesPushed = await fillChunk(chunk, changesToPersist)
+      if (changesPushed) {
+        checkElapsedTime(timer)
+        currentChunk = chunk
+        await chunkStore.create(projectId, chunk, earliestChangeTimestamp)
+      } else {
+        throw new Error('failed to fill empty chunk')
+      }
+    }
+  }
+
+  function isOlderThanMinChangeTimestamp(change) {
+    return change.getTimestamp().getTime() < limits.minChangeTimestamp
+  }
+
+  function isOlderThanMaxChangeTimestamp(change) {
+    return change.getTimestamp().getTime() < limits.maxChangeTimestamp
+  }
+
+  const oldChanges = _.filter(allChanges, isOlderThanMinChangeTimestamp)
+  const anyTooOld = _.some(oldChanges, isOlderThanMaxChangeTimestamp)
+  const tooManyChanges = oldChanges.length > limits.maxChanges
+  const tooManyBytes = totalChangeBytes(oldChanges) > limits.maxChangeBytes
+
+  if (anyTooOld || tooManyChanges || tooManyBytes) {
+    changesToPersist = oldChanges
+    const numberOfChangesToPersist = oldChanges.length
+
+    await extendLastChunkIfPossible()
+    await createNewChunksAsNeeded()
+
+    return {
+      numberOfChangesPersisted: numberOfChangesToPersist,
+      originalEndVersion,
+      currentChunk,
+    }
+  } else {
+    return null
+  }
+}
+
+module.exports = persistChanges
--- a/services/history-v1/storage/lib/persistor.js
+++ b/services/history-v1/storage/lib/persistor.js
@@ -0,0 +1,27 @@
+const _ = require('lodash')
+const config = require('config')
+const metrics = require('@overleaf/metrics')
+const objectPersistor = require('@overleaf/object-persistor')
+
+const persistorConfig = _.cloneDeep(config.get('persistor'))
+
+function convertKey(key, convertFn) {
+  if (_.has(persistorConfig, key)) {
+    _.update(persistorConfig, key, convertFn)
+  }
+}
+
+convertKey('s3.signedUrlExpiryInMs', s => parseInt(s, 10))
+convertKey('s3.httpOptions.timeout', s => parseInt(s, 10))
+convertKey('s3.maxRetries', s => parseInt(s, 10))
+convertKey('s3.pathStyle', s => s === 'true')
+convertKey('gcs.unlockBeforeDelete', s => s === 'true')
+convertKey('gcs.unsignedUrls', s => s === 'true')
+convertKey('gcs.signedUrlExpiryInMs', s => parseInt(s, 10))
+convertKey('gcs.deleteConcurrency', s => parseInt(s, 10))
+convertKey('gcs.retryOptions.maxRetries', s => parseInt(s, 10))
+convertKey('fallback.buckets', s => JSON.parse(s || '{}'))
+
+persistorConfig.Metrics = metrics
+
+module.exports = objectPersistor(persistorConfig)
--- a/services/history-v1/storage/lib/project_archive.js
+++ b/services/history-v1/storage/lib/project_archive.js
@@ -0,0 +1,140 @@
+// @ts-check
+'use strict'
+
+/**
+ * @import { Snapshot } from 'overleaf-editor-core'
+ * @import { BlobStore } from '../../storage/lib/blob_store/index'
+ */
+
+const Archive = require('archiver')
+const BPromise = require('bluebird')
+const fs = require('node:fs')
+const { pipeline } = require('node:stream')
+
+const core = require('overleaf-editor-core')
+
+const Snapshot = core.Snapshot
+const OError = require('@overleaf/o-error')
+
+const assert = require('./assert')
+
+// The maximum safe concurrency appears to be 1.
+// https://github.com/overleaf/issues/issues/1909
+const FETCH_CONCURRENCY = 1 // number of files to fetch at once
+const DEFAULT_ZIP_TIMEOUT = 25000 // ms
+
+class DownloadError extends OError {
+  constructor(hash) {
+    super(`ProjectArchive: blob download failed: ${hash}`, { hash })
+  }
+}
+
+class ArchiveTimeout extends OError {
+  constructor() {
+    super('ProjectArchive timed out')
+  }
+}
+
+class MissingfileError extends OError {
+  constructor() {
+    super('ProjectArchive: attempting to look up a file that does not exist')
+  }
+}
+
+class ProjectArchive {
+  static ArchiveTimeout = ArchiveTimeout
+  static MissingfileError = MissingfileError
+  static DownloadError = DownloadError
+
+  /**
+   * @constructor
+   * @param {Snapshot} snapshot
+   * @param {number} [timeout] in ms
+   * @classdesc
+   * Writes the project snapshot to a zip file.
+   */
+  constructor(snapshot, timeout) {
+    assert.instance(snapshot, Snapshot)
+    this.snapshot = snapshot
+    this.timeout = timeout || DEFAULT_ZIP_TIMEOUT
+  }
+
+  /**
+   * Write zip archive to the given file path.
+   *
+   * @param {BlobStore} blobStore
+   * @param {string} zipFilePath
+   */
+  writeZip(blobStore, zipFilePath) {
+    const snapshot = this.snapshot
+    const timeout = this.timeout
+
+    const startTime = process.hrtime()
+    const archive = new Archive('zip')
+
+    // Convert elapsed seconds and nanoseconds to milliseconds.
+    function findElapsedMilliseconds() {
+      const elapsed = process.hrtime(startTime)
+      return elapsed[0] * 1e3 + elapsed[1] * 1e-6
+    }
+
+    function addFileToArchive(pathname) {
+      if (findElapsedMilliseconds() > timeout) {
+        throw new ProjectArchive.ArchiveTimeout()
+      }
+
+      const file = snapshot.getFile(pathname)
+      if (!file) {
+        throw new ProjectArchive.MissingfileError()
+      }
+      return file.load('eager', blobStore).then(function () {
+        const content = file.getContent({ filterTrackedDeletes: true })
+        if (content === null) {
+          return streamFileToArchive(pathname, file).catch(function (err) {
+            throw new ProjectArchive.DownloadError(file.getHash()).withCause(
+              err
+            )
+          })
+        } else {
+          archive.append(content, { name: pathname })
+        }
+      })
+    }
+
+    function streamFileToArchive(pathname, file) {
+      return new BPromise(function (resolve, reject) {
+        blobStore
+          .getStream(file.getHash())
+          .then(stream => {
+            stream.on('error', reject)
+            stream.on('end', resolve)
+            archive.append(stream, { name: pathname })
+          })
+          .catch(reject)
+      })
+    }
+
+    const addFilesToArchiveAndFinalize = BPromise.map(
+      snapshot.getFilePathnames(),
+      addFileToArchive,
+      { concurrency: FETCH_CONCURRENCY }
+    ).then(function () {
+      archive.finalize()
+    })
+
+    const streamArchiveToFile = new BPromise(function (resolve, reject) {
+      const stream = fs.createWriteStream(zipFilePath)
+      pipeline(archive, stream, function (err) {
+        if (err) {
+          reject(err)
+        } else {
+          resolve()
+        }
+      })
+    })
+
+    return BPromise.join(streamArchiveToFile, addFilesToArchiveAndFinalize)
+  }
+}
+
+module.exports = ProjectArchive
--- a/services/history-v1/storage/lib/project_key.js
+++ b/services/history-v1/storage/lib/project_key.js
@@ -0,0 +1,24 @@
+// Keep in sync with services/web/app/src/Features/History/project_key.js
+const _ = require('lodash')
+const path = require('node:path')
+
+//
+// The advice in http://docs.aws.amazon.com/AmazonS3/latest/dev/
+// request-rate-perf-considerations.html is to avoid sequential key prefixes,
+// so we reverse the project ID part of the key as they suggest.
+//
+function format(projectId) {
+  const prefix = naiveReverse(pad(projectId))
+  return path.join(prefix.slice(0, 3), prefix.slice(3, 6), prefix.slice(6))
+}
+
+function pad(number) {
+  return _.padStart(number, 9, '0')
+}
+
+function naiveReverse(string) {
+  return string.split('').reverse().join('')
+}
+
+exports.format = format
+exports.pad = pad
--- a/services/history-v1/storage/lib/redis.js
+++ b/services/history-v1/storage/lib/redis.js
@@ -0,0 +1,19 @@
+const config = require('config')
+const redis = require('@overleaf/redis-wrapper')
+
+const historyRedisOptions = config.get('redis.history')
+const rclientHistory = redis.createClient(historyRedisOptions)
+
+const lockRedisOptions = config.get('redis.history')
+const rclientLock = redis.createClient(lockRedisOptions)
+
+async function disconnect() {
+  await Promise.all([rclientHistory.disconnect(), rclientLock.disconnect()])
+}
+
+module.exports = {
+  rclientHistory,
+  rclientLock,
+  redis,
+  disconnect,
+}
--- a/services/history-v1/storage/lib/streams.js
+++ b/services/history-v1/storage/lib/streams.js
@@ -0,0 +1,40 @@
+// @ts-check
+/**
+ * Promises are promises and streams are streams, and ne'er the twain shall
+ * meet.
+ * @module
+ */
+'use strict'
+
+const Stream = require('node:stream')
+const zlib = require('node:zlib')
+const { WritableBuffer } = require('@overleaf/stream-utils')
+
+/**
+ * Create a promise for the result of reading a stream to a buffer.
+ *
+ * @param {Stream.Readable} readStream
+ * @return {Promise<Buffer>}
+ */
+async function readStreamToBuffer(readStream) {
+  const bufferStream = new WritableBuffer()
+  await Stream.promises.pipeline(readStream, bufferStream)
+  return bufferStream.contents()
+}
+
+exports.readStreamToBuffer = readStreamToBuffer
+
+/**
+ * Create a promise for the result of un-gzipping a stream to a buffer.
+ *
+ * @param {NodeJS.ReadableStream} readStream
+ * @return {Promise<Buffer>}
+ */
+async function gunzipStreamToBuffer(readStream) {
+  const gunzip = zlib.createGunzip()
+  const bufferStream = new WritableBuffer()
+  await Stream.promises.pipeline(readStream, gunzip, bufferStream)
+  return bufferStream.contents()
+}
+
+exports.gunzipStreamToBuffer = gunzipStreamToBuffer
--- a/services/history-v1/storage/lib/temp.js
+++ b/services/history-v1/storage/lib/temp.js
@@ -0,0 +1,25 @@
+/*
+ * Taken from renderer/app/helpers/temp.js with minor cosmetic changes.
+ * Promisify the temp package. The temp package provides a 'track' feature
+ * that automatically cleans up temp files at process exit, but that is not
+ * very useful. They also provide a method to trigger cleanup, but that is not
+ * safe for concurrent use. So, we use a disposer to unlink the file.
+ */
+
+const BPromise = require('bluebird')
+const fs = BPromise.promisifyAll(require('node:fs'))
+const temp = BPromise.promisifyAll(require('temp'))
+
+exports.open = function (affixes) {
+  return temp.openAsync(affixes).disposer(function (fileInfo) {
+    fs.closeAsync(fileInfo.fd)
+      .then(() => {
+        return fs.unlinkAsync(fileInfo.path)
+      })
+      .catch(function (err) {
+        if (err.code !== 'ENOENT') {
+          throw err
+        }
+      })
+  })
+}
--- a/services/history-v1/storage/lib/zip_store.js
+++ b/services/history-v1/storage/lib/zip_store.js
@@ -0,0 +1,134 @@
+'use strict'
+
+const BPromise = require('bluebird')
+const config = require('config')
+const fs = require('node:fs')
+const path = require('node:path')
+
+const OError = require('@overleaf/o-error')
+const objectPersistor = require('@overleaf/object-persistor')
+
+const assert = require('./assert')
+const { BlobStore } = require('./blob_store')
+const persistor = require('./persistor')
+const ProjectArchive = require('./project_archive')
+const projectKey = require('./project_key')
+const temp = require('./temp')
+
+const BUCKET = config.get('zipStore.bucket')
+
+function getZipKey(projectId, version) {
+  return path.join(
+    projectKey.format(projectId),
+    version.toString(),
+    'project.zip'
+  )
+}
+
+/**
+ * Store a zip of a given version of a project in bucket.
+ *
+ * @class
+ */
+class ZipStore {
+  /**
+   * Generate signed link to access the zip file.
+   *
+   * @param {number | string} projectId
+   * @param {number} version
+   * @return {string}
+   */
+  async getSignedUrl(projectId, version) {
+    assert.projectId(projectId, 'bad projectId')
+    assert.integer(version, 'bad version')
+
+    const key = getZipKey(projectId, version)
+    return await persistor.getRedirectUrl(BUCKET, key)
+  }
+
+  /**
+   * Generate a zip of the given snapshot.
+   *
+   * @param {number | string} projectId
+   * @param {number} version
+   * @param {Snapshot} snapshot
+   */
+  async storeZip(projectId, version, snapshot) {
+    assert.projectId(projectId, 'bad projectId')
+    assert.integer(version, 'bad version')
+    assert.object(snapshot, 'bad snapshot')
+
+    const zipKey = getZipKey(projectId, version)
+
+    if (await isZipPresent()) return
+
+    await BPromise.using(temp.open('zip'), async tempFileInfo => {
+      await zipSnapshot(tempFileInfo.path, snapshot)
+      await uploadZip(tempFileInfo.path)
+    })
+
+    // If the file is already there, we don't need to build the zip again. If we
+    // just HEAD the file, there's a race condition, because the zip files
+    // automatically expire. So, we try to copy the file from itself to itself,
+    // and if it fails, we know the file didn't exist. If it succeeds, this has
+    // the effect of re-extending its lifetime.
+    async function isZipPresent() {
+      try {
+        await persistor.copyObject(BUCKET, zipKey, zipKey)
+        return true
+      } catch (error) {
+        if (!(error instanceof objectPersistor.Errors.NotFoundError)) {
+          console.error(
+            'storeZip: isZipPresent: unexpected error (except in dev): %s',
+            error
+          )
+        }
+        return false
+      }
+    }
+
+    async function zipSnapshot(tempPathname, snapshot) {
+      const blobStore = new BlobStore(projectId)
+      const zipTimeoutMs = parseInt(config.get('zipStore.zipTimeoutMs'), 10)
+      const archive = new ProjectArchive(snapshot, zipTimeoutMs)
+      try {
+        await archive.writeZip(blobStore, tempPathname)
+      } catch (err) {
+        throw new ZipStore.CreationError(projectId, version).withCause(err)
+      }
+    }
+
+    async function uploadZip(tempPathname, snapshot) {
+      const stream = fs.createReadStream(tempPathname)
+      try {
+        await persistor.sendStream(BUCKET, zipKey, stream, {
+          contentType: 'application/zip',
+        })
+      } catch (err) {
+        throw new ZipStore.UploadError(projectId, version).withCause(err)
+      }
+    }
+  }
+}
+
+class CreationError extends OError {
+  constructor(projectId, version) {
+    super(`Zip creation failed for ${projectId} version ${version}`, {
+      projectId,
+      version,
+    })
+  }
+}
+ZipStore.CreationError = CreationError
+
+class UploadError extends OError {
+  constructor(projectId, version) {
+    super(`Zip upload failed for ${projectId} version ${version}`, {
+      projectId,
+      version,
+    })
+  }
+}
+ZipStore.UploadError = UploadError
+
+module.exports = new ZipStore()
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
--- a/services/history-v1/storage/scripts/back_fill_file_hash_fix_up.mjs
+++ b/services/history-v1/storage/scripts/back_fill_file_hash_fix_up.mjs
@@ -0,0 +1,647 @@
+// @ts-check
+import Events from 'node:events'
+import fs from 'node:fs'
+import Stream from 'node:stream'
+import { ObjectId } from 'mongodb'
+import logger from '@overleaf/logger'
+import OError from '@overleaf/o-error'
+import { Blob } from 'overleaf-editor-core'
+import {
+  BlobStore,
+  getStringLengthOfFile,
+  GLOBAL_BLOBS,
+  makeBlobForFile,
+} from '../lib/blob_store/index.js'
+import { db } from '../lib/mongodb.js'
+import commandLineArgs from 'command-line-args'
+import readline from 'node:readline'
+import { _blobIsBackedUp, backupBlob } from '../lib/backupBlob.mjs'
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
+import filestorePersistor from '../lib/persistor.js'
+import { setTimeout } from 'node:timers/promises'
+
+// Silence warning.
+Events.setMaxListeners(20)
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+/**
+ * @typedef {import("mongodb").Collection} Collection
+ * @typedef {import("mongodb").Collection<Project>} ProjectsCollection
+ * @typedef {import("mongodb").Collection<{project: Project}>} DeletedProjectsCollection
+ */
+
+/**
+ * @typedef {Object} FileRef
+ * @property {ObjectId} _id
+ * @property {string} hash
+ */
+
+/**
+ * @typedef {Object} Folder
+ * @property {Array<Folder>} folders
+ * @property {Array<FileRef>} fileRefs
+ */
+
+/**
+ * @typedef {Object} Project
+ * @property {ObjectId} _id
+ * @property {Array<Folder>} rootFolder
+ * @property {{history: {id: (number|string)}}} overleaf
+ */
+
+/**
+ * @return {{FIX_NOT_FOUND: boolean, FIX_HASH_MISMATCH: boolean, FIX_DELETE_PERMISSION: boolean, FIX_MISSING_HASH: boolean, LOGS: string}}
+ */
+function parseArgs() {
+  const args = commandLineArgs([
+    { name: 'fixNotFound', type: String, defaultValue: 'true' },
+    { name: 'fixDeletePermission', type: String, defaultValue: 'true' },
+    { name: 'fixHashMismatch', type: String, defaultValue: 'true' },
+    { name: 'fixMissingHash', type: String, defaultValue: 'true' },
+    { name: 'logs', type: String, defaultValue: '' },
+  ])
+  /**
+   * commandLineArgs cannot handle --foo=false, so go the long way
+   * @param {string} name
+   * @return {boolean}
+   */
+  function boolVal(name) {
+    const v = args[name]
+    if (['true', 'false'].includes(v)) return v === 'true'
+    throw new Error(`expected "true" or "false" for boolean option ${name}`)
+  }
+  return {
+    FIX_HASH_MISMATCH: boolVal('fixNotFound'),
+    FIX_DELETE_PERMISSION: boolVal('fixDeletePermission'),
+    FIX_NOT_FOUND: boolVal('fixHashMismatch'),
+    FIX_MISSING_HASH: boolVal('fixMissingHash'),
+    LOGS: args.logs,
+  }
+}
+
+const {
+  FIX_HASH_MISMATCH,
+  FIX_DELETE_PERMISSION,
+  FIX_NOT_FOUND,
+  FIX_MISSING_HASH,
+  LOGS,
+} = parseArgs()
+if (!LOGS) {
+  throw new Error('--logs parameter missing')
+}
+const BUFFER_DIR = fs.mkdtempSync(
+  process.env.BUFFER_DIR_PREFIX || '/tmp/back_fill_file_hash-'
+)
+const USER_FILES_BUCKET_NAME = process.env.USER_FILES_BUCKET_NAME || ''
+if (!USER_FILES_BUCKET_NAME) {
+  throw new Error('env var USER_FILES_BUCKET_NAME is missing')
+}
+// https://nodejs.org/api/stream.html#streamgetdefaulthighwatermarkobjectmode
+const STREAM_HIGH_WATER_MARK = parseInt(
+  process.env.STREAM_HIGH_WATER_MARK || (64 * 1024).toString(),
+  10
+)
+const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
+
+/** @type {ProjectsCollection} */
+const projectsCollection = db.collection('projects')
+/** @type {DeletedProjectsCollection} */
+const deletedProjectsCollection = db.collection('deletedProjects')
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+  gracefulShutdownInitiated = true
+  console.warn('graceful shutdown initiated, draining queue')
+}
+
+class FileDeletedError extends OError {}
+
+/** @type {Map<string,{project: Project, projectSoftDeleted: boolean}>} */
+const PROJECT_CACHE = new Map()
+
+/**
+ * @param {string} projectId
+ * @return {Promise<{project: Project, projectSoftDeleted: boolean}>}
+ */
+async function getProject(projectId) {
+  const cached = PROJECT_CACHE.get(projectId)
+  if (cached) return cached
+
+  let projectSoftDeleted
+  let project = await projectsCollection.findOne({
+    _id: new ObjectId(projectId),
+  })
+  if (project) {
+    projectSoftDeleted = false
+  } else {
+    const softDeleted = await deletedProjectsCollection.findOne({
+      'deleterData.deletedProjectId': new ObjectId(projectId),
+      project: { $exists: true },
+    })
+    if (!softDeleted) {
+      throw new OError('project hard-deleted')
+    }
+    project = softDeleted.project
+    projectSoftDeleted = true
+  }
+  PROJECT_CACHE.set(projectId, { projectSoftDeleted, project })
+  return { projectSoftDeleted, project }
+}
+
+/**
+ * @param {Folder} folder
+ * @param {string} fileId
+ * @return {{path: string, fileRef: FileRef, folder: Folder}|null}
+ */
+function getFileTreePath(folder, fileId) {
+  if (!folder) return null
+  let idx = 0
+  if (Array.isArray(folder.fileRefs)) {
+    for (const fileRef of folder.fileRefs) {
+      if (fileRef?._id.toString() === fileId) {
+        return {
+          fileRef,
+          path: `.fileRefs.${idx}`,
+          folder,
+        }
+      }
+      idx++
+    }
+  }
+  idx = 0
+  if (Array.isArray(folder.folders)) {
+    for (const child of folder.folders) {
+      const match = getFileTreePath(child, fileId)
+      if (match) {
+        return {
+          fileRef: match.fileRef,
+          folder: match.folder,
+          path: `.folders.${idx}${match.path}`,
+        }
+      }
+      idx++
+    }
+  }
+  return null
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @return {Promise<{fileRef: FileRef, folder: Folder, fullPath: string, query: Object, projectSoftDeleted: boolean}>}
+ */
+async function findFile(projectId, fileId) {
+  const { projectSoftDeleted, project } = await getProject(projectId)
+  const match = getFileTreePath(project.rootFolder[0], fileId)
+  if (!match) {
+    throw new FileDeletedError('file not found in file-tree', {
+      projectSoftDeleted,
+    })
+  }
+  const { path, fileRef, folder } = match
+  let fullPath
+  let query
+  if (projectSoftDeleted) {
+    fullPath = `project.rootFolder.0${path}`
+    query = {
+      'deleterData.deletedProjectId': new ObjectId(projectId),
+      [`${fullPath}._id`]: new ObjectId(fileId),
+    }
+  } else {
+    fullPath = `rootFolder.0${path}`
+    query = {
+      _id: new ObjectId(projectId),
+      [`${fullPath}._id`]: new ObjectId(fileId),
+    }
+  }
+  return {
+    projectSoftDeleted,
+    query,
+    fullPath,
+    fileRef,
+    folder,
+  }
+}
+
+/**
+ * @param {string} line
+ * @return {Promise<boolean>}
+ */
+async function fixNotFound(line) {
+  const { projectId, fileId, bucketName } = JSON.parse(line)
+  if (bucketName !== USER_FILES_BUCKET_NAME) {
+    throw new OError('not found case for another bucket')
+  }
+
+  const { projectSoftDeleted, query, fullPath, fileRef, folder } =
+    await findFile(projectId, fileId)
+  logger.info({ projectId, fileId, fileRef }, 'removing fileRef')
+  // Copied from _removeElementFromMongoArray (https://github.com/overleaf/internal/blob/11e09528c153de6b7766d18c3c90d94962190371/services/web/app/src/Features/Project/ProjectEntityMongoUpdateHandler.js)
+  const nonArrayPath = fullPath.slice(0, fullPath.lastIndexOf('.'))
+  let result
+  if (projectSoftDeleted) {
+    result = await deletedProjectsCollection.updateOne(query, {
+      $pull: { [nonArrayPath]: { _id: new ObjectId(fileId) } },
+      $inc: { 'project.version': 1 },
+    })
+  } else {
+    result = await projectsCollection.updateOne(query, {
+      $pull: { [nonArrayPath]: { _id: new ObjectId(fileId) } },
+      $inc: { version: 1 },
+    })
+  }
+  if (result.matchedCount !== 1) {
+    throw new OError('file-tree write did not match', { result })
+  }
+  // Update the cache. The mongo-path of the next file will be off otherwise.
+  folder.fileRefs = folder.fileRefs.filter(f => !f._id.equals(fileId))
+  return true
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} hash
+ * @return {Promise<void>}
+ */
+async function setHashInMongo(projectId, fileId, hash) {
+  const { projectSoftDeleted, query, fullPath, fileRef } = await findFile(
+    projectId,
+    fileId
+  )
+  if (fileRef.hash === hash) return
+  logger.info({ projectId, fileId, fileRef, hash }, 'setting fileRef hash')
+  let result
+  if (projectSoftDeleted) {
+    result = await deletedProjectsCollection.updateOne(query, {
+      $set: { [`${fullPath}.hash`]: hash },
+      $inc: { 'project.version': 1 },
+    })
+  } else {
+    result = await projectsCollection.updateOne(query, {
+      $set: { [`${fullPath}.hash`]: hash },
+      $inc: { version: 1 },
+    })
+  }
+  if (result.matchedCount !== 1) {
+    throw new OError('file-tree write did not match', { result })
+  }
+  fileRef.hash = hash // Update cache for completeness.
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} historyId
+ * @return {Promise<void>}
+ */
+async function importRestoredFilestoreFile(projectId, fileId, historyId) {
+  const filestoreKey = `${projectId}/${fileId}`
+  const path = `${BUFFER_DIR}/${projectId}_${fileId}`
+  try {
+    let s
+    try {
+      s = await filestorePersistor.getObjectStream(
+        USER_FILES_BUCKET_NAME,
+        filestoreKey
+      )
+    } catch (err) {
+      if (err instanceof NotFoundError) {
+        throw new OError('missing blob, need to restore filestore file', {
+          filestoreKey,
+        })
+      }
+      throw err
+    }
+    await Stream.promises.pipeline(
+      s,
+      fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
+    )
+    const blobStore = new BlobStore(historyId)
+    const blob = await blobStore.putFile(path)
+    await backupBlob(historyId, blob, path)
+    await setHashInMongo(projectId, fileId, blob.getHash())
+  } finally {
+    await fs.promises.rm(path, { force: true })
+  }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} path
+ * @return {Promise<Blob>}
+ */
+async function bufferFilestoreFileToDisk(projectId, fileId, path) {
+  const filestoreKey = `${projectId}/${fileId}`
+  try {
+    await Stream.promises.pipeline(
+      await filestorePersistor.getObjectStream(
+        USER_FILES_BUCKET_NAME,
+        filestoreKey
+      ),
+      fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
+    )
+    const blob = await makeBlobForFile(path)
+    blob.setStringLength(
+      await getStringLengthOfFile(blob.getByteLength(), path)
+    )
+    return blob
+  } catch (err) {
+    if (err instanceof NotFoundError) {
+      throw new OError('missing blob, need to restore filestore file', {
+        filestoreKey,
+      })
+    }
+    throw err
+  }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @return {Promise<string>}
+ */
+async function computeFilestoreFileHash(projectId, fileId) {
+  const path = `${BUFFER_DIR}/${projectId}_${fileId}`
+  try {
+    const blob = await bufferFilestoreFileToDisk(projectId, fileId, path)
+    return blob.getHash()
+  } finally {
+    await fs.promises.rm(path, { force: true })
+  }
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @return {Promise<void>}
+ */
+async function uploadFilestoreFile(projectId, fileId) {
+  const path = `${BUFFER_DIR}/${projectId}_${fileId}`
+  try {
+    const blob = await bufferFilestoreFileToDisk(projectId, fileId, path)
+    const hash = blob.getHash()
+    try {
+      await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
+    } catch (err) {
+      if (!(err instanceof Blob.NotFoundError)) throw err
+
+      const { project } = await getProject(projectId)
+      const historyId = project.overleaf.history.id.toString()
+      const blobStore = new BlobStore(historyId)
+      await blobStore.putBlob(path, blob)
+      await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
+    }
+  } finally {
+    await fs.promises.rm(path, { force: true })
+  }
+}
+
+/**
+ * @param {string} line
+ * @return {Promise<boolean>}
+ */
+async function fixHashMismatch(line) {
+  const {
+    projectId,
+    fileId,
+    hash: computedHash,
+    entry: {
+      hash: fileTreeHash,
+      ctx: { historyId },
+    },
+  } = JSON.parse(line)
+  const blobStore = new BlobStore(historyId)
+  if (await blobStore.getBlob(fileTreeHash)) {
+    throw new OError('found blob with computed filestore object hash')
+  }
+  if (!(await blobStore.getBlob(computedHash))) {
+    await importRestoredFilestoreFile(projectId, fileId, historyId)
+    return true
+  }
+  return await ensureBlobExistsForFileAndUploadToAWS(
+    projectId,
+    fileId,
+    computedHash
+  )
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} hash
+ * @return {Promise<boolean>}
+ */
+async function hashAlreadyUpdatedInFileTree(projectId, fileId, hash) {
+  const { fileRef } = await findFile(projectId, fileId)
+  return fileRef.hash === hash
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise<boolean>}
+ */
+async function needsBackingUpToAWS(projectId, hash) {
+  if (GLOBAL_BLOBS.has(hash)) return false
+  return !(await _blobIsBackedUp(projectId, hash))
+}
+
+/**
+ * @param {string} projectId
+ * @param {string} fileId
+ * @param {string} hash
+ * @return {Promise<boolean>}
+ */
+async function ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash) {
+  const { project } = await getProject(projectId)
+  const historyId = project.overleaf.history.id.toString()
+  const blobStore = new BlobStore(historyId)
+  if (
+    (await hashAlreadyUpdatedInFileTree(projectId, fileId, hash)) &&
+    (await blobStore.getBlob(hash)) &&
+    !(await needsBackingUpToAWS(projectId, hash))
+  ) {
+    return false // already processed
+  }
+
+  const stream = await blobStore.getStream(hash)
+  const path = `${BUFFER_DIR}/${historyId}_${hash}`
+  try {
+    await Stream.promises.pipeline(
+      stream,
+      fs.createWriteStream(path, {
+        highWaterMark: STREAM_HIGH_WATER_MARK,
+      })
+    )
+
+    const writtenBlob = await makeBlobForFile(path)
+    writtenBlob.setStringLength(
+      await getStringLengthOfFile(writtenBlob.getByteLength(), path)
+    )
+    if (writtenBlob.getHash() !== hash) {
+      // Double check download, better safe than sorry.
+      throw new OError('blob corrupted', { writtenBlob })
+    }
+
+    let blob = await blobStore.getBlob(hash)
+    if (!blob) {
+      // Calling blobStore.putBlob would result in the same error again.
+      // HACK: Skip upload to GCS and finalize putBlob operation directly.
+      await blobStore.backend.insertBlob(historyId, writtenBlob)
+    }
+    await backupBlob(historyId, writtenBlob, path)
+  } finally {
+    await fs.promises.rm(path, { force: true })
+  }
+  await setHashInMongo(projectId, fileId, hash)
+  return true
+}
+
+/**
+ * @param {string} line
+ * @return {Promise<boolean>}
+ */
+async function fixDeletePermission(line) {
+  let { projectId, fileId, hash } = JSON.parse(line)
+  if (!hash) hash = await computeFilestoreFileHash(projectId, fileId)
+  return await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
+}
+
+/**
+ * @param {string} line
+ * @return {Promise<boolean>}
+ */
+async function fixMissingHash(line) {
+  let { projectId, _id: fileId } = JSON.parse(line)
+  const {
+    fileRef: { hash },
+  } = await findFile(projectId, fileId)
+  if (hash) {
+    // processed, double check
+    return await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
+  }
+  await uploadFilestoreFile(projectId, fileId)
+  return true
+}
+
+const CASES = {
+  'not found': {
+    match: 'NotFoundError',
+    flag: FIX_NOT_FOUND,
+    action: fixNotFound,
+  },
+  'hash mismatch': {
+    match: 'OError: hash mismatch',
+    flag: FIX_HASH_MISMATCH,
+    action: fixHashMismatch,
+  },
+  'delete permission': {
+    match: 'storage.objects.delete',
+    flag: FIX_DELETE_PERMISSION,
+    action: fixDeletePermission,
+  },
+  'missing file hash': {
+    match: '"bad file hash"',
+    flag: FIX_MISSING_HASH,
+    action: fixMissingHash,
+  },
+}
+
+const STATS = {
+  processedLines: 0,
+  success: 0,
+  alreadyProcessed: 0,
+  fileDeleted: 0,
+  skipped: 0,
+  failed: 0,
+  unmatched: 0,
+}
+function logStats() {
+  console.log(
+    JSON.stringify({
+      time: new Date(),
+      gracefulShutdownInitiated,
+      ...STATS,
+    })
+  )
+}
+setInterval(logStats, 10_000)
+
+async function processLog() {
+  const rl = readline.createInterface({
+    input: fs.createReadStream(LOGS),
+  })
+  nextLine: for await (const line of rl) {
+    if (gracefulShutdownInitiated) break
+    STATS.processedLines++
+    if (
+      !(
+        line.includes('"failed to process file"') ||
+        // Process missing hashes as flagged by find_malformed_filetrees.mjs
+        line.includes('"bad file-tree path"')
+      )
+    ) {
+      continue
+    }
+
+    for (const [name, { match, flag, action }] of Object.entries(CASES)) {
+      if (!line.includes(match)) continue
+      if (flag) {
+        try {
+          if (await action(line)) {
+            STATS.success++
+          } else {
+            STATS.alreadyProcessed++
+          }
+        } catch (err) {
+          if (err instanceof FileDeletedError) {
+            STATS.fileDeleted++
+            logger.info({ err, line }, 'file deleted, skipping')
+          } else {
+            STATS.failed++
+            logger.error({ err, line }, `failed to fix ${name}`)
+          }
+        }
+      } else {
+        STATS.skipped++
+      }
+      continue nextLine
+    }
+    STATS.unmatched++
+    logger.warn({ line }, 'unknown fatal error')
+  }
+}
+
+async function main() {
+  try {
+    await processLog()
+  } finally {
+    logStats()
+    try {
+      await fs.promises.rm(BUFFER_DIR, { recursive: true, force: true })
+    } catch (err) {
+      console.error(`Cleanup of BUFFER_DIR=${BUFFER_DIR} failed`, err)
+    }
+  }
+  const { skipped, failed, unmatched } = STATS
+  await setTimeout(SLEEP_BEFORE_EXIT)
+  if (failed > 0) {
+    process.exit(Math.min(failed, 99))
+  } else if (unmatched > 0) {
+    process.exit(100)
+  } else if (skipped > 0) {
+    process.exit(101)
+  } else {
+    process.exit(0)
+  }
+}
+
+await main()
--- a/services/history-v1/storage/scripts/backup.mjs
+++ b/services/history-v1/storage/scripts/backup.mjs
--- a/services/history-v1/storage/scripts/backup_blob.mjs
+++ b/services/history-v1/storage/scripts/backup_blob.mjs
@@ -0,0 +1,173 @@
+// @ts-check
+import commandLineArgs from 'command-line-args'
+import { backupBlob, downloadBlobToDir } from '../lib/backupBlob.mjs'
+import withTmpDir from '../../api/controllers/with_tmp_dir.js'
+import {
+  BlobStore,
+  GLOBAL_BLOBS,
+  loadGlobalBlobs,
+} from '../lib/blob_store/index.js'
+import assert from '../lib/assert.js'
+import knex from '../lib/knex.js'
+import { client } from '../lib/mongodb.js'
+import redis from '../lib/redis.js'
+import { setTimeout } from 'node:timers/promises'
+import fs from 'node:fs'
+
+await loadGlobalBlobs()
+
+/**
+ * Gracefully shutdown the process
+ * @return {Promise<void>}
+ */
+async function gracefulShutdown() {
+  console.log('Gracefully shutting down')
+  await knex.destroy()
+  await client.close()
+  await redis.disconnect()
+  await setTimeout(100)
+  process.exit()
+}
+
+/**
+ *
+ * @param {string} row
+ * @return {BackupBlobJob}
+ */
+function parseCSVRow(row) {
+  const [historyId, hash] = row.split(',')
+  validateBackedUpBlobJob({ historyId, hash })
+  return { historyId, hash }
+}
+
+/**
+ *
+ * @param {BackupBlobJob} job
+ */
+function validateBackedUpBlobJob(job) {
+  assert.projectId(job.historyId)
+  assert.blobHash(job.hash)
+}
+
+/**
+ *
+ * @param {string} path
+ * @return {Promise<Array<BackupBlobJob>>}
+ */
+async function readCSV(path) {
+  let fh
+  /** @type {Array<BackupBlobJob>} */
+  const rows = []
+  try {
+    fh = await fs.promises.open(path, 'r')
+  } catch (error) {
+    console.error(`Could not open file: ${error}`)
+    throw error
+  }
+  for await (const line of fh.readLines()) {
+    try {
+      const row = parseCSVRow(line)
+      if (GLOBAL_BLOBS.has(row.hash)) {
+        console.log(`Skipping global blob: ${line}`)
+        continue
+      }
+      rows.push(row)
+    } catch (error) {
+      console.error(error instanceof Error ? error.message : error)
+      console.log(`Skipping invalid row: ${line}`)
+    }
+  }
+  return rows
+}
+
+/**
+ * @typedef {Object} BackupBlobJob
+ * @property {string} hash
+ * @property {string} historyId
+ */
+
+/**
+ * @param {Object} options
+ * @property {string} [options.historyId]
+ * @property {string} [options.hash]
+ * @property {string} [options.input]
+ * @return {Promise<Array<BackupBlobJob>>}
+ */
+async function initialiseJobs({ historyId, hash, input }) {
+  if (input) {
+    return await readCSV(input)
+  }
+
+  if (!historyId) {
+    console.error('historyId is required')
+    process.exitCode = 1
+    await gracefulShutdown()
+  }
+
+  if (!hash) {
+    console.error('hash is required')
+    process.exitCode = 1
+    await gracefulShutdown()
+  }
+
+  validateBackedUpBlobJob({ historyId, hash })
+
+  if (GLOBAL_BLOBS.has(hash)) {
+    console.error(`Blob ${hash} is a global blob; not backing up`)
+    process.exitCode = 1
+    await gracefulShutdown()
+  }
+  return [{ hash, historyId }]
+}
+
+/**
+ *
+ * @param {string} historyId
+ * @param {string} hash
+ * @return {Promise<void>}
+ */
+export async function downloadAndBackupBlob(historyId, hash) {
+  const blobStore = new BlobStore(historyId)
+  const blob = await blobStore.getBlob(hash)
+  if (!blob) {
+    throw new Error(`Blob ${hash} could not be loaded`)
+  }
+  await withTmpDir(`blob-${hash}`, async tmpDir => {
+    const filePath = await downloadBlobToDir(historyId, blob, tmpDir)
+    console.log(`Downloaded blob ${hash} to ${filePath}`)
+    await backupBlob(historyId, blob, filePath)
+    console.log('Backed up blob')
+  })
+}
+
+let jobs
+
+const options = commandLineArgs([
+  { name: 'historyId', type: String },
+  { name: 'hash', type: String },
+  { name: 'input', type: String },
+])
+
+try {
+  jobs = await initialiseJobs(options)
+} catch (error) {
+  console.error(error)
+  await gracefulShutdown()
+}
+
+if (!Array.isArray(jobs)) {
+  // This is mostly to satisfy typescript
+  process.exitCode = 1
+  await gracefulShutdown()
+  process.exit(1)
+}
+
+for (const { historyId, hash } of jobs) {
+  try {
+    await downloadAndBackupBlob(historyId, hash)
+  } catch (error) {
+    console.error(error)
+    process.exitCode = 1
+  }
+}
+await gracefulShutdown()
--- a/services/history-v1/storage/scripts/backup_sample.mjs
+++ b/services/history-v1/storage/scripts/backup_sample.mjs
@@ -0,0 +1,153 @@
+// @ts-check
+import { ObjectId } from 'mongodb'
+import { READ_PREFERENCE_SECONDARY } from '@overleaf/mongo-utils/batchedUpdate.js'
+import { db, client } from '../lib/mongodb.js'
+
+const projectsCollection = db.collection('projects')
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+// Configuration
+const SAMPLE_SIZE_PER_ITERATION = process.argv[2]
+  ? parseInt(process.argv[2], 10)
+  : 10000
+const TARGET_ERROR_PERCENTAGE = process.argv[3]
+  ? parseFloat(process.argv[3])
+  : 5.0
+
+let gracefulShutdownInitiated = false
+
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+  gracefulShutdownInitiated = true
+  console.warn('graceful shutdown initiated')
+}
+
+async function takeSample(sampleSize) {
+  const results = await projectsCollection
+    .aggregate(
+      [
+        { $sample: { size: sampleSize } },
+        {
+          $match: { 'overleaf.backup.lastBackedUpVersion': { $exists: true } },
+        },
+        {
+          $count: 'total',
+        },
+      ],
+      { readPreference: READ_PREFERENCE_SECONDARY }
+    )
+    .toArray()
+
+  const count = results[0]?.total || 0
+  return { totalSampled: sampleSize, backedUp: count }
+}
+
+function calculateStatistics(
+  cumulativeSampled,
+  cumulativeBackedUp,
+  totalPopulation
+) {
+  const proportion = Math.max(1, cumulativeBackedUp) / cumulativeSampled
+
+  // Standard error with finite population correction
+  const fpc = Math.sqrt(
+    (totalPopulation - cumulativeSampled) / (totalPopulation - 1)
+  )
+  const stdError =
+    Math.sqrt((proportion * (1 - proportion)) / cumulativeSampled) * fpc
+
+  // 95% confidence interval is approximately ±1.96 standard errors
+  const marginOfError = 1.96 * stdError
+
+  return {
+    proportion,
+    percentage: (proportion * 100).toFixed(2),
+    marginOfError,
+    errorPercentage: (marginOfError * 100).toFixed(2),
+    lowerBound: ((proportion - marginOfError) * 100).toFixed(2),
+    upperBound: ((proportion + marginOfError) * 100).toFixed(2),
+    sampleSize: cumulativeSampled,
+    populationSize: totalPopulation,
+  }
+}
+
+async function main() {
+  console.log('Date:', new Date().toISOString())
+  const totalCount = await projectsCollection.estimatedDocumentCount({
+    readPreference: READ_PREFERENCE_SECONDARY,
+  })
+  console.log(
+    `Total projects in collection (estimated): ${totalCount.toLocaleString()}`
+  )
+  console.log(`Target margin of error: ${TARGET_ERROR_PERCENTAGE}%`)
+
+  let cumulativeSampled = 0
+  let cumulativeBackedUp = 0
+  let currentError = Infinity
+  let iteration = 0
+
+  console.log('Iteration | Total Sampled | % Backed Up | Margin of Error')
+  console.log('----------|---------------|-------------|----------------')
+
+  while (currentError > TARGET_ERROR_PERCENTAGE) {
+    if (gracefulShutdownInitiated) {
+      console.log('Graceful shutdown initiated. Exiting sampling loop.')
+      break
+    }
+
+    iteration++
+    const { totalSampled, backedUp } = await takeSample(
+      SAMPLE_SIZE_PER_ITERATION
+    )
+    cumulativeSampled += totalSampled
+    cumulativeBackedUp += backedUp
+
+    const stats = calculateStatistics(
+      cumulativeSampled,
+      cumulativeBackedUp,
+      totalCount
+    )
+    currentError = parseFloat(stats.errorPercentage)
+
+    console.log(
+      `${iteration.toString().padStart(9)} | ` +
+        `${cumulativeSampled.toString().padStart(13)} | ` +
+        `${stats.percentage.padStart(10)}% | ` +
+        `\u00B1${stats.errorPercentage}%`
+    )
+
+    // Small delay between iterations
+    await new Promise(resolve => setTimeout(resolve, 100))
+  }
+
+  const finalStats = calculateStatistics(
+    cumulativeSampled,
+    cumulativeBackedUp,
+    totalCount
+  )
+
+  console.log(
+    `Projects sampled: ${cumulativeSampled.toLocaleString()} out of ${totalCount.toLocaleString()}`
+  )
+  console.log(
+    `Estimated percentage with lastBackedUpVersion: ${finalStats.percentage}%`
+  )
+  console.log(
+    `95% Confidence Interval: ${finalStats.lowerBound}% - ${finalStats.upperBound}%`
+  )
+  console.log(`Final Margin of Error: \u00B1${finalStats.errorPercentage}%`)
+}
+
+main()
+  .then(() => console.log('Done.'))
+  .catch(err => {
+    console.error('Error:', err)
+    process.exitCode = 1
+  })
+  .finally(() => {
+    client.close().catch(err => console.error('Error closing MongoDB:', err))
+  })
--- a/services/history-v1/storage/scripts/backup_scheduler.mjs
+++ b/services/history-v1/storage/scripts/backup_scheduler.mjs
@@ -0,0 +1,429 @@
+import Queue from 'bull'
+import config from 'config'
+import commandLineArgs from 'command-line-args'
+import logger from '@overleaf/logger'
+import {
+  listPendingBackups,
+  listUninitializedBackups,
+  getBackupStatus,
+} from '../lib/backup_store/index.js'
+
+logger.initialize('backup-queue')
+
+// Use the same redis config as backup_worker
+const redisOptions = config.get('redis.queue')
+
+// Create a Bull queue named 'backup'
+const backupQueue = new Queue('backup', {
+  redis: redisOptions,
+  defaultJobOptions: {
+    removeOnComplete: true,
+    removeOnFail: true,
+  },
+})
+
+// Define command-line options
+const optionDefinitions = [
+  { name: 'clean', type: Boolean },
+  { name: 'status', type: Boolean },
+  {
+    name: 'add',
+    type: String,
+    multiple: true,
+    description: 'Project IDs or date range in YYYY-MM-DD:YYYY-MM-DD format',
+  },
+  { name: 'monitor', type: Boolean },
+  {
+    name: 'queue-pending',
+    type: Number,
+    description:
+      'Find projects with pending changes older than N seconds and add them to the queue',
+  },
+  {
+    name: 'show-pending',
+    type: Number,
+    description:
+      'Show count of pending projects older than N seconds without adding to queue',
+  },
+  {
+    name: 'limit',
+    type: Number,
+    description: 'Limit the number of jobs to be added',
+  },
+  {
+    name: 'interval',
+    type: Number,
+    description: 'Time in seconds to spread jobs over (default: 300)',
+    defaultValue: 300,
+  },
+  {
+    name: 'backoff-delay',
+    type: Number,
+    description:
+      'Backoff delay in milliseconds for failed jobs (default: 1000)',
+    defaultValue: 1000,
+  },
+  {
+    name: 'attempts',
+    type: Number,
+    description: 'Number of retry attempts for failed jobs (default: 3)',
+    defaultValue: 3,
+  },
+  {
+    name: 'warn-threshold',
+    type: Number,
+    description: 'Warn about any project exceeding this pending age',
+    defaultValue: 2 * 3600, // 2 hours
+  },
+  {
+    name: 'verbose',
+    alias: 'v',
+    type: Boolean,
+    description: 'Show detailed information when used with --show-pending',
+  },
+]
+
+// Parse command line arguments
+const options = commandLineArgs(optionDefinitions)
+const WARN_THRESHOLD = options['warn-threshold']
+
+// Helper to validate date format
+function isValidDateFormat(dateStr) {
+  return /^\d{4}-\d{2}-\d{2}$/.test(dateStr)
+}
+
+// Helper to validate the pending time parameter
+function validatePendingTime(option, value) {
+  if (typeof value !== 'number' || value <= 0) {
+    console.error(
+      `Error: --${option} requires a positive numeric TIME argument in seconds`
+    )
+    console.error(`Example: --${option} 3600`)
+    process.exit(1)
+  }
+  return value
+}
+
+// Helper to format the pending time display
+function formatPendingTime(timestamp) {
+  const now = new Date()
+  const diffMs = now - timestamp
+  const seconds = Math.floor(diffMs / 1000)
+  return `${timestamp.toISOString()} (${seconds} seconds ago)`
+}
+
+// Helper to add a job to the queue, checking for duplicates
+async function addJobWithCheck(queue, data, options) {
+  const jobId = options.jobId
+
+  // Check if the job already exists
+  const existingJob = await queue.getJob(jobId)
+
+  if (existingJob) {
+    return { job: existingJob, added: false }
+  } else {
+    const job = await queue.add(data, options)
+    return { job, added: true }
+  }
+}
+
+// Setup queue event listeners
+function setupMonitoring() {
+  console.log('Starting queue monitoring. Press Ctrl+C to exit.')
+
+  backupQueue.on('global:error', error => {
+    logger.info({ error }, 'Queue error')
+  })
+
+  backupQueue.on('global:waiting', jobId => {
+    logger.info({ jobId }, 'job is waiting')
+  })
+
+  backupQueue.on('global:active', jobId => {
+    logger.info({ jobId }, 'job is now active')
+  })
+
+  backupQueue.on('global:stalled', jobId => {
+    logger.info({ jobId }, 'job has stalled')
+  })
+
+  backupQueue.on('global:progress', (jobId, progress) => {
+    logger.info({ jobId, progress }, 'job progress')
+  })
+
+  backupQueue.on('global:completed', (jobId, result) => {
+    logger.info({ jobId, result }, 'job completed')
+  })
+
+  backupQueue.on('global:failed', (jobId, err) => {
+    logger.info({ jobId, err }, 'job failed')
+  })
+
+  backupQueue.on('global:paused', () => {
+    logger.info({}, 'Queue paused')
+  })
+
+  backupQueue.on('global:resumed', () => {
+    logger.info({}, 'Queue resumed')
+  })
+
+  backupQueue.on('global:cleaned', (jobs, type) => {
+    logger.info({ jobsCount: jobs.length, type }, 'Jobs cleaned')
+  })
+
+  backupQueue.on('global:drained', () => {
+    logger.info({}, 'Queue drained')
+  })
+
+  backupQueue.on('global:removed', jobId => {
+    logger.info({ jobId }, 'Job removed')
+  })
+}
+
+async function addDateRangeJob(input) {
+  const [startDate, endDate] = input.split(':')
+  if (!isValidDateFormat(startDate) || !isValidDateFormat(endDate)) {
+    console.error(
+      `Invalid date format for "${input}". Use YYYY-MM-DD:YYYY-MM-DD`
+    )
+    return
+  }
+
+  const jobId = `backup-${startDate}-to-${endDate}`
+  const { job, added } = await addJobWithCheck(
+    backupQueue,
+    { startDate, endDate },
+    { jobId }
+  )
+
+  console.log(
+    `${added ? 'Added' : 'Already exists'}: date range backup job: ${startDate} to ${endDate}, job ID: ${job.id}`
+  )
+}
+
+// Helper to list pending and uninitialized backups
+// This function combines the two cursors into a single generator
+// to yield projects from both lists
+async function* pendingCursor(timeIntervalMs, limit) {
+  for await (const project of listPendingBackups(timeIntervalMs, limit)) {
+    yield project
+  }
+  for await (const project of listUninitializedBackups(timeIntervalMs, limit)) {
+    yield project
+  }
+}
+
+// Process pending projects with changes older than the specified seconds
+async function processPendingProjects(
+  age,
+  showOnly,
+  limit,
+  verbose,
+  jobInterval,
+  jobOpts = {}
+) {
+  const timeIntervalMs = age * 1000
+  console.log(
+    `Finding projects with pending changes older than ${age} seconds${showOnly ? ' (count only)' : ''}`
+  )
+
+  let count = 0
+  let addedCount = 0
+  let existingCount = 0
+  // Pass the limit directly to MongoDB query for better performance
+  const changeTimes = []
+  for await (const project of pendingCursor(timeIntervalMs, limit)) {
+    const projectId = project._id.toHexString()
+    const pendingAt =
+      project.overleaf?.backup?.pendingChangeAt || project._id.getTimestamp()
+    if (pendingAt) {
+      changeTimes.push(pendingAt)
+      const pendingAge = Math.floor((Date.now() - pendingAt.getTime()) / 1000)
+      if (pendingAge > WARN_THRESHOLD) {
+        try {
+          const backupStatus = await getBackupStatus(projectId)
+          logger.warn(
+            {
+              projectId,
+              pendingAt,
+              pendingAge,
+              backupStatus,
+              warnThreshold: WARN_THRESHOLD,
+            },
+            `pending change exceeds rpo warning threshold`
+          )
+        } catch (err) {
+          logger.error(
+            { projectId, pendingAt, pendingAge },
+            'Error getting backup status'
+          )
+          throw err
+        }
+      }
+    }
+    if (showOnly && verbose) {
+      console.log(
+        `Project: ${projectId} (pending since: ${formatPendingTime(pendingAt)})`
+      )
+    } else if (!showOnly) {
+      const delay = Math.floor(Math.random() * jobInterval * 1000) // add random delay to avoid all jobs running simultaneously
+      const { job, added } = await addJobWithCheck(
+        backupQueue,
+        { projectId, pendingChangeAt: pendingAt.getTime() },
+        { ...jobOpts, delay, jobId: projectId }
+      )
+
+      if (added) {
+        if (verbose) {
+          console.log(
+            `Added job for project: ${projectId}, job ID: ${job.id} (pending since: ${formatPendingTime(pendingAt)})`
+          )
+        }
+        addedCount++
+      } else {
+        if (verbose) {
+          console.log(
+            `Job already exists for project: ${projectId}, job ID: ${job.id} (pending since: ${formatPendingTime(pendingAt)})`
+          )
+        }
+        existingCount++
+      }
+    }
+
+    count++
+    if (count % 1000 === 0) {
+      console.log(
+        `Processed ${count} projects`,
+        showOnly ? '' : `(${addedCount} added, ${existingCount} existing)`
+      )
+    }
+  }
+  // Set oldestChange to undefined if there are no changes
+  const oldestChange =
+    changeTimes.length > 0
+      ? changeTimes.reduce((min, time) => (time < min ? time : min))
+      : undefined
+
+  if (showOnly) {
+    console.log(
+      `Found ${count} projects with pending changes (not added to queue)`
+    )
+  } else {
+    console.log(`Found ${count} projects with pending changes:`)
+    console.log(`  ${addedCount} jobs added to queue`)
+    console.log(`  ${existingCount} jobs already existed in queue`)
+    if (oldestChange) {
+      console.log(`  Oldest pending change: ${formatPendingTime(oldestChange)}`)
+    }
+  }
+}
+
+// Main execution block
+async function run() {
+  const optionCount = [
+    options.clean,
+    options.status,
+    options.add,
+    options.monitor,
+    options['queue-pending'] !== undefined,
+    options['show-pending'] !== undefined,
+  ].filter(Boolean).length
+  if (optionCount > 1) {
+    console.error('Only one option can be specified')
+    process.exit(1)
+  }
+
+  if (options.clean) {
+    const beforeCounts = await backupQueue.getJobCounts()
+    console.log('Current queue state:', JSON.stringify(beforeCounts))
+    console.log('Cleaning completed and failed jobs...')
+    await backupQueue.clean(1, 'completed')
+    await backupQueue.clean(1, 'failed')
+    const afterCounts = await backupQueue.getJobCounts()
+    console.log('Current queue state:', JSON.stringify(afterCounts))
+    console.log('Queue cleaned successfully')
+  } else if (options.status) {
+    const counts = await backupQueue.getJobCounts()
+    console.log('Current queue state:', JSON.stringify(counts))
+  } else if (options.add) {
+    const inputs = Array.isArray(options.add) ? options.add : [options.add]
+    for (const input of inputs) {
+      if (input.includes(':')) {
+        // Handle date range format
+        await addDateRangeJob(input)
+      } else {
+        // Handle project ID format
+        const { job, added } = await addJobWithCheck(
+          backupQueue,
+          { projectId: input },
+          { jobId: input }
+        )
+        console.log(
+          `${added ? 'Added' : 'Already exists'}: job for project: ${input}, job ID: ${job.id}`
+        )
+      }
+    }
+  } else if (options.monitor) {
+    setupMonitoring()
+  } else if (options['queue-pending'] !== undefined) {
+    const age = validatePendingTime('queue-pending', options['queue-pending'])
+    await processPendingProjects(
+      age,
+      false,
+      options.limit,
+      options.verbose,
+      options.interval,
+      {
+        attempts: options.attempts,
+        backoff: {
+          type: 'exponential',
+          delay: options['backoff-delay'],
+        },
+      }
+    )
+  } else if (options['show-pending'] !== undefined) {
+    const age = validatePendingTime('show-pending', options['show-pending'])
+    await processPendingProjects(age, true, options.limit, options.verbose)
+  } else {
+    console.log('Usage:')
+    console.log('  --clean               Clean up completed and failed jobs')
+    console.log('  --status              Show current job counts')
+    console.log('  --add [projectId]     Add a job for the specified projectId')
+    console.log(
+      '  --add [YYYY-MM-DD:YYYY-MM-DD] Add a job for the specified date range'
+    )
+    console.log('  --monitor             Monitor queue events')
+    console.log(
+      '  --queue-pending TIME  Find projects with changes older than TIME seconds and add them to the queue'
+    )
+    console.log(
+      '  --show-pending TIME   Show count of pending projects older than TIME seconds'
+    )
+    console.log('  --limit N             Limit the number of jobs to be added')
+    console.log(
+      '  --interval TIME       Time interval in seconds to spread jobs over'
+    )
+    console.log(
+      '  --backoff-delay TIME  Backoff delay in milliseconds for failed jobs (default: 1000)'
+    )
+    console.log(
+      '  --attempts N          Number of retry attempts for failed jobs (default: 3)'
+    )
+    console.log(
+      '  --verbose, -v         Show detailed information when used with --show-pending'
+    )
+  }
+}
+
+// Run and handle errors
+run()
+  .catch(err => {
+    console.error('Error:', err)
+    process.exit(1)
+  })
+  .then(result => {
+    // Only exit if not in monitor mode
+    if (!options.monitor) {
+      process.exit(0)
+    }
+  })
--- a/services/history-v1/storage/scripts/backup_worker.mjs
+++ b/services/history-v1/storage/scripts/backup_worker.mjs
@@ -0,0 +1,144 @@
+import Queue from 'bull'
+import logger from '@overleaf/logger'
+import config from 'config'
+import metrics from '@overleaf/metrics'
+import {
+  backupProject,
+  initializeProjects,
+  configureBackup,
+} from './backup.mjs'
+
+const CONCURRENCY = 15
+const WARN_THRESHOLD = 2 * 60 * 60 * 1000 // warn if projects are older than this
+const redisOptions = config.get('redis.queue')
+const JOB_TIME_BUCKETS = [10, 100, 500, 1000, 5000, 10000, 30000, 60000] // milliseconds
+const LAG_TIME_BUCKETS_HRS = [
+  0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 6,
+] // hours
+
+// Configure backup settings to match worker concurrency
+configureBackup({ concurrency: 50, useSecondary: true })
+
+// Create a Bull queue named 'backup'
+const backupQueue = new Queue('backup', {
+  redis: redisOptions,
+  settings: {
+    lockDuration: 15 * 60 * 1000, // 15 minutes
+    lockRenewTime: 60 * 1000, // 1 minute
+    maxStalledCount: 0, // mark stalled jobs as failed
+  },
+})
+
+// Log queue events
+backupQueue.on('active', job => {
+  logger.debug({ job }, 'job is now active')
+})
+
+backupQueue.on('completed', (job, result) => {
+  metrics.inc('backup_worker_job', 1, { status: 'completed' })
+  logger.debug({ job, result }, 'job completed')
+})
+
+backupQueue.on('failed', (job, err) => {
+  metrics.inc('backup_worker_job', 1, { status: 'failed' })
+  logger.error({ job, err }, 'job failed')
+})
+
+backupQueue.on('waiting', jobId => {
+  logger.debug({ jobId }, 'job is waiting')
+})
+
+backupQueue.on('error', error => {
+  logger.error({ error }, 'queue error')
+})
+
+backupQueue.on('stalled', job => {
+  logger.error({ job }, 'job has stalled')
+})
+
+backupQueue.on('lock-extension-failed', (job, err) => {
+  logger.error({ job, err }, 'lock extension failed')
+})
+
+backupQueue.on('paused', () => {
+  logger.info('queue paused')
+})
+
+backupQueue.on('resumed', () => {
+  logger.info('queue resumed')
+})
+
+// Process jobs
+backupQueue.process(CONCURRENCY, async job => {
+  const { projectId, startDate, endDate } = job.data
+
+  if (projectId) {
+    return await runBackup(projectId, job.data, job)
+  } else if (startDate && endDate) {
+    return await runInit(startDate, endDate)
+  } else {
+    throw new Error('invalid job data')
+  }
+})
+
+async function runBackup(projectId, data, job) {
+  const { pendingChangeAt } = data
+  // record the time it takes to run the backup job
+  const timer = new metrics.Timer(
+    'backup_worker_job_duration',
+    1,
+    {},
+    JOB_TIME_BUCKETS
+  )
+  const pendingAge = Date.now() - pendingChangeAt
+  if (pendingAge > WARN_THRESHOLD) {
+    logger.warn(
+      { projectId, pendingAge, job },
+      'project has been pending for a long time'
+    )
+  }
+  try {
+    logger.debug({ projectId }, 'processing backup for project')
+    await backupProject(projectId, {})
+    metrics.inc('backup_worker_project', 1, {
+      status: 'success',
+    })
+    timer.done()
+    // record the replication lag (time from change to backup)
+    if (pendingChangeAt) {
+      metrics.histogram(
+        'backup_worker_replication_lag_in_hours',
+        (Date.now() - pendingChangeAt) / (3600 * 1000),
+        LAG_TIME_BUCKETS_HRS
+      )
+    }
+    return `backup completed ${projectId}`
+  } catch (err) {
+    metrics.inc('backup_worker_project', 1, { status: 'failed' })
+    logger.error({ projectId, err }, 'backup failed')
+    throw err // Re-throw to mark job as failed
+  }
+}
+
+async function runInit(startDate, endDate) {
+  try {
+    logger.info({ startDate, endDate }, 'initializing projects')
+    await initializeProjects({ 'start-date': startDate, 'end-date': endDate })
+    return `initialization completed ${startDate} - ${endDate}`
+  } catch (err) {
+    logger.error({ startDate, endDate, err }, 'initialization failed')
+    throw err
+  }
+}
+
+export async function drainQueue() {
+  logger.info({ queue: backupQueue.name }, 'pausing queue')
+  await backupQueue.pause(true) // pause this worker and wait for jobs to finish
+  logger.info({ queue: backupQueue.name }, 'closing queue')
+  await backupQueue.close()
+}
+
+export async function healthCheck() {
+  const count = await backupQueue.count()
+  metrics.gauge('backup_worker_queue_length', count)
+}
--- a/services/history-v1/storage/scripts/export_global_blobs.mjs
+++ b/services/history-v1/storage/scripts/export_global_blobs.mjs
@@ -0,0 +1,69 @@
+/**
+ * A script to export the global blobs from mongo to a CSV file.
+ *
+ * node storage/scripts/export_global_blobs.mjs --output global_blobs.csv
+ *
+ * The output CSV has the following format:
+ *
+ * hash,path,byteLength,stringLength,demoted
+ *
+ * hash: the hash of the blob
+ * path: the path of the blob in the blob store
+ * byteLength: the byte length of the blob, or empty if unknown
+ * stringLength: the string length of the blob, or empty if unknown
+ * demoted: true if the blob has been demoted to a reference, false otherwise
+ */
+
+// @ts-check
+import { ObjectId } from 'mongodb'
+import { GLOBAL_BLOBS, loadGlobalBlobs } from '../lib/blob_store/index.js'
+import { client } from '../lib/mongodb.js'
+import commandLineArgs from 'command-line-args'
+import fs from 'node:fs'
+
+// Enable caching for ObjectId.toString()
+ObjectId.cacheHexString = true
+
+function parseArgs() {
+  const args = commandLineArgs([
+    {
+      name: 'output',
+      type: String,
+      alias: 'o',
+    },
+  ])
+  const OUTPUT_STREAM = fs.createWriteStream(args['output'], { flags: 'wx' })
+
+  return {
+    OUTPUT_STREAM,
+  }
+}
+
+const { OUTPUT_STREAM } = parseArgs()
+
+async function main() {
+  await loadGlobalBlobs()
+  OUTPUT_STREAM.write('hash,path,byteLength,stringLength,demoted\n')
+  for (const [hash, { blob, demoted }] of GLOBAL_BLOBS) {
+    const { hash: blobHash, byteLength, stringLength } = blob
+    if (blobHash !== hash) {
+      throw new Error(`hash mismatch: ${hash} !== ${blobHash}`)
+    }
+    const path = blobHash.slice(0, 2) + '/' + blobHash.slice(2)
+    const byteLengthStr = byteLength === null ? '' : byteLength
+    const stringLengthStr = stringLength === null ? '' : stringLength
+    OUTPUT_STREAM.write(
+      `${hash},${path},${byteLengthStr},${stringLengthStr},${demoted}\n`
+    )
+  }
+}
+
+main()
+  .then(() => console.log('Done.'))
+  .catch(err => {
+    console.error('Error:', err)
+    process.exitCode = 1
+  })
+  .finally(() => {
+    client.close().catch(err => console.error('Error closing MongoDB:', err))
+  })
--- a/services/history-v1/storage/scripts/fix_string_backedUpBlobs_ids.mjs
+++ b/services/history-v1/storage/scripts/fix_string_backedUpBlobs_ids.mjs
@@ -0,0 +1,51 @@
+// @ts-check
+import { backedUpBlobs } from '../lib/mongodb.js'
+import { mongoId } from '../lib/assert.js'
+import { ObjectId } from 'mongodb'
+import commandLineArgs from 'command-line-args'
+
+const STATS = {
+  total: 0,
+  replaced: 0,
+  skipped: 0,
+}
+
+const config = commandLineArgs([
+  { name: 'commit', type: Boolean, defaultValue: false },
+])
+
+async function processRecord(record) {
+  STATS.total++
+  try {
+    mongoId(record._id)
+    const newId = new ObjectId(record._id)
+    if (config.commit) {
+      await backedUpBlobs.updateOne(
+        { _id: newId },
+        {
+          $addToSet: { blobs: { $each: record.blobs } },
+        },
+        { upsert: true }
+      )
+      await backedUpBlobs.deleteOne({ _id: record._id })
+    }
+    STATS.replaced++
+  } catch (error) {
+    console.log(error)
+    STATS.skipped++
+  }
+}
+
+const cursor = backedUpBlobs
+  .find({ _id: { $type: 'string' } })
+  .project({ _id: 1, blobs: 1 })
+
+while (await cursor.hasNext()) {
+  const record = await cursor.next()
+  await processRecord(record)
+}
+
+console.log(
+  `${!config.commit ? 'DRY RUN' : ''} ${STATS.total} records ${STATS.replaced} replaced, ${STATS.skipped} skipped`
+)
+process.exit()
--- a/services/history-v1/storage/scripts/global-blobs-db-cleanup/01-create-blob-hashes-table.sql
+++ b/services/history-v1/storage/scripts/global-blobs-db-cleanup/01-create-blob-hashes-table.sql
--- a/services/history-v1/storage/scripts/global-blobs-db-cleanup/02-set-global-flag.sql
+++ b/services/history-v1/storage/scripts/global-blobs-db-cleanup/02-set-global-flag.sql
@@ -0,0 +1,3 @@
+UPDATE blobs
+SET global = TRUE
+WHERE hash_bytes IN (SELECT hash_bytes FROM global_blob_hashes);
--- a/services/history-v1/storage/scripts/global-blobs-db-cleanup/03-create-global-blobs-table.sql
+++ b/services/history-v1/storage/scripts/global-blobs-db-cleanup/03-create-global-blobs-table.sql
@@ -0,0 +1,16 @@
+CREATE TABLE global_blobs (
+    hash_bytes bytea NOT NULL,
+    byte_length integer NOT NULL,
+    string_length integer,
+    global boolean,
+    CONSTRAINT global_blobs_pkey PRIMARY KEY (hash_bytes),
+    CONSTRAINT global_blobs_byte_length_non_negative
+        CHECK (byte_length >= 0),
+    CONSTRAINT global_blobs_string_length_non_negative
+        CHECK (string_length IS NULL OR string_length >= 0)
+);
+
+INSERT INTO global_blobs (hash_bytes, byte_length, string_length, global)
+SELECT hash_bytes, byte_length, string_length, true
+FROM blobs
+WHERE hash_bytes IN (SELECT hash_bytes FROM global_blob_hashes);
--- a/services/history-v1/storage/scripts/global-blobs-db-cleanup/04-swap-global-blob-tables.sql
+++ b/services/history-v1/storage/scripts/global-blobs-db-cleanup/04-swap-global-blob-tables.sql
@@ -0,0 +1,22 @@
+BEGIN;
+    ALTER TABLE blobs RENAME TO old_blobs;
+    ALTER TABLE global_blobs RENAME TO blobs;
+
+    ALTER TABLE old_blobs
+        RENAME CONSTRAINT blobs_pkey TO old_blobs_pkey;
+    ALTER TABLE old_blobs
+        RENAME CONSTRAINT blobs_byte_length_non_negative
+        TO old_blobs_byte_length_non_negative;
+    ALTER TABLE old_blobs
+        RENAME CONSTRAINT blobs_string_length_non_negative
+        TO old_blobs_string_length_non_negative;
+
+    ALTER TABLE blobs
+        RENAME CONSTRAINT global_blobs_pkey TO blobs_pkey;
+    ALTER TABLE blobs
+        RENAME CONSTRAINT global_blobs_byte_length_non_negative
+        TO blobs_byte_length_non_negative;
+    ALTER TABLE blobs
+        RENAME CONSTRAINT global_blobs_string_length_non_negative
+        TO blobs_string_length_non_negative;
+COMMIT;
--- a/services/history-v1/storage/scripts/global-blobs-db-cleanup/README.md
+++ b/services/history-v1/storage/scripts/global-blobs-db-cleanup/README.md
@@ -0,0 +1,9 @@
+Scripts in this directory were used when we cleaned up the global blobs table,
+ensuring that it only contained global blobs. The scripts are meant to be run in this order:
+
+* `01-create-blob-hashes-table.sql`
+* `02-set-global-flag.sql`
+* `03-create-global-blobs-table.sql`
+* `04-swap-global-blob-tables.sql`
+
+The `rollback.sql` can be run to reverse the effect of `03-swap-global-blob-tables.sql`.
--- a/services/history-v1/storage/scripts/global-blobs-db-cleanup/rollback.sql
+++ b/services/history-v1/storage/scripts/global-blobs-db-cleanup/rollback.sql
@@ -0,0 +1,22 @@
+BEGIN;
+    ALTER TABLE blobs RENAME TO global_blobs;
+    ALTER TABLE old_blobs RENAME TO blobs;
+
+    ALTER TABLE global_blobs
+        RENAME CONSTRAINT blobs_pkey TO global_blobs_pkey;
+    ALTER TABLE global_blobs
+        RENAME CONSTRAINT blobs_byte_length_non_negative
+        TO global_blobs_byte_length_non_negative;
+    ALTER TABLE global_blobs
+        RENAME CONSTRAINT blobs_string_length_non_negative
+        TO global_blobs_string_length_non_negative;
+
+    ALTER TABLE blobs
+        RENAME CONSTRAINT old_blobs_pkey TO blobs_pkey;
+    ALTER TABLE blobs
+        RENAME CONSTRAINT old_blobs_byte_length_non_negative
+        TO blobs_byte_length_non_negative;
+    ALTER TABLE blobs
+        RENAME CONSTRAINT old_blobs_string_length_non_negative
+        TO blobs_string_length_non_negative;
+COMMIT;
--- a/services/history-v1/storage/scripts/recover_doc_versions.js
+++ b/services/history-v1/storage/scripts/recover_doc_versions.js
@@ -0,0 +1,379 @@
+const fsPromises = require('node:fs/promises')
+const { ObjectId } = require('mongodb')
+const BPromise = require('bluebird')
+const logger = require('@overleaf/logger')
+const Settings = require('@overleaf/settings')
+const rclient = require('@overleaf/redis-wrapper').createClient(
+  Settings.redis.documentupdater
+)
+const mongodb = require('../lib/mongodb')
+const { chunkStore } = require('..')
+const Events = require('node:events')
+
+// Silence warning.
+Events.setMaxListeners(20)
+
+const BATCH_SIZE = 1000
+const OPTIONS = {
+  concurrency: parseInt(process.env.DOC_VERSION_RECOVERY_CONCURRENCY, 10) || 20,
+  force: process.env.DOC_VERSION_RECOVERY_FORCE === 'true',
+  'skip-history-failures':
+    process.env.DOC_VERSION_RECOVERY_SKIP_HISTORY_FAILURES === 'true',
+  'resyncs-needed-file': process.env.DOC_VERSION_RECOVERY_RESYNCS_NEEDED_FILE,
+}
+
+const db = {
+  deletedProjects: mongodb.db.collection('deletedProjects'),
+  docs: mongodb.db.collection('docs'),
+  migrations: mongodb.db.collection('migrations'),
+  projects: mongodb.db.collection('projects'),
+}
+
+const BAD_MIGRATION_NAME =
+  '20231219081700_move_doc_versions_from_docops_to_docs'
+
+const RECOVERY_FILES_502 = [
+  '/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log',
+  '/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log.done',
+]
+
+let loggingChain = Promise.resolve()
+const projectIdsThatNeedResyncing = []
+const unflushedDocIds = new Set()
+
+async function flushLogQueue() {
+  const logPath = OPTIONS['resyncs-needed-file']
+  loggingChain = loggingChain.then(async () => {
+    const batch = projectIdsThatNeedResyncing.splice(0)
+    if (batch.length === 0) return
+    try {
+      await fsPromises.appendFile(logPath, batch.join('\n') + '\n')
+    } catch (err) {
+      projectIdsThatNeedResyncing.push(...batch)
+      logger.err({ err, logPath, batch }, 'Failed to write to log file')
+    }
+  })
+  await loggingChain
+}
+async function recordProjectNeedsResync(projectId) {
+  if (OPTIONS['resyncs-needed-file']) {
+    projectIdsThatNeedResyncing.push(projectId)
+    await flushLogQueue()
+  } else {
+    console.log(`Project ${projectId} needs a hard resync.`)
+  }
+}
+
+async function main() {
+  const recovery502Ran = await did502RecoveryRun()
+  await getUnflushedDocIds()
+  const badMigration = await db.migrations.findOne({ name: BAD_MIGRATION_NAME })
+
+  if (unflushedDocIds.size > 0 && !recovery502Ran && badMigration != null) {
+    // Tell customers that they need to flush
+    console.log(`
+--------------------------------------------------------------------
+Detected unflushed changes while recovering doc versions.
+Please go back to version 5.0.1 and follow the recovery procedure
+for flushing document updates:
+
+https://github.com/overleaf/overleaf/wiki/Doc-version-recovery
+--------------------------------------------------------------------`)
+    process.exit(1)
+  }
+
+  if (OPTIONS.force || recovery502Ran || badMigration != null) {
+    console.warn('Need to recover doc versions. This will take a while.')
+    await runRecovery()
+    await db.migrations.deleteOne({ name: BAD_MIGRATION_NAME })
+    await delete502RecoveryFiles()
+  }
+
+  console.log('Done.')
+}
+
+async function did502RecoveryRun() {
+  for (const file of RECOVERY_FILES_502) {
+    try {
+      await fsPromises.stat(file)
+      return true
+    } catch (err) {
+      // file doesn't exist. continue
+    }
+  }
+  return false
+}
+
+async function delete502RecoveryFiles() {
+  for (const file of RECOVERY_FILES_502) {
+    try {
+      await fsPromises.rename(file, file.replace('.log', '-5.0.2.log'))
+    } catch (err) {
+      // file doesn't exist. continue
+    }
+  }
+}
+
+async function runRecovery() {
+  let batch = []
+  const summary = {
+    ignored: 0,
+    skipped: 0,
+    deletedUpdatedMongo: 0,
+    deletedUpdatedRedis: 0,
+    deletedUpdatedBoth: 0,
+    deletedIgnored: 0,
+    updatedMongo: 0,
+    updatedRedis: 0,
+    updatedBoth: 0,
+  }
+  const processBatchAndLogProgress = async () => {
+    try {
+      await BPromise.map(batch, project => processProject(project, summary), {
+        concurrency: OPTIONS.concurrency,
+      })
+    } finally {
+      console.log(`${summary.updatedRedis} projects updated in Redis`)
+      console.log(`${summary.updatedMongo} projects updated in Mongo`)
+      console.log(
+        `${summary.updatedBoth} projects updated in both Mongo and Redis`
+      )
+      console.log(`${summary.ignored} projects had good versions`)
+      console.log(
+        `${summary.deletedUpdatedMongo} deleted projects updated in Mongo`
+      )
+      console.log(
+        `${summary.deletedUpdatedRedis} deleted projects updated in Redis`
+      )
+      console.log(
+        `${summary.deletedUpdatedBoth} deleted projects updated in both Mongo and Redis`
+      )
+      console.log(
+        `${summary.deletedIgnored} deleted projects had good versions`
+      )
+      console.log(`${summary.skipped} projects skipped`)
+    }
+    batch = []
+  }
+
+  await printDBStats()
+  await initResyncsNeededFile()
+  for await (const project of getProjects()) {
+    batch.push(project)
+    if (batch.length >= BATCH_SIZE) {
+      await processBatchAndLogProgress()
+    }
+  }
+
+  for await (const deletedProject of getDeletedProjects()) {
+    const project = deletedProject.project
+    project.isDeleted = true
+    batch.push(project)
+    if (batch.length >= BATCH_SIZE) {
+      await processBatchAndLogProgress()
+    }
+  }
+
+  if (batch.length > 0) {
+    await processBatchAndLogProgress()
+  }
+
+  await backfillMissingVersions()
+}
+
+async function getUnflushedDocIds() {
+  const batchSize = 1000
+  let cursor = '0'
+  do {
+    const [newCursor, keys] = await rclient.scan(
+      cursor,
+      'MATCH',
+      Settings.redis.documentupdater.key_schema.docVersion({ doc_id: '*' }),
+      'COUNT',
+      batchSize
+    )
+    for (const key of keys) {
+      unflushedDocIds.add(key.slice('DocVersion:'.length))
+    }
+    cursor = newCursor
+  } while (cursor !== '0')
+}
+
+async function printDBStats() {
+  const projects = await db.projects.estimatedDocumentCount()
+  const deletedProjects = await db.deletedProjects.countDocuments()
+  const docs = await db.docs.estimatedDocumentCount()
+  console.log(
+    `Need to check ${projects} projects and up-to ${deletedProjects} deleted projects with a total of ${docs} docs.`
+  )
+}
+
+async function initResyncsNeededFile() {
+  const logPath = OPTIONS['resyncs-needed-file']
+  if (logPath) {
+    await fsPromises.writeFile(logPath, '')
+    await fsPromises.rm(`${logPath}.done`, { force: true })
+  }
+}
+
+function getProjects() {
+  return db.projects.find({}, { projection: { _id: 1, overleaf: 1 } })
+}
+
+function getDeletedProjects() {
+  return db.deletedProjects.find(
+    { 'project.overleaf.history.id': { $exists: true } },
+    { projection: { 'project._id': 1, 'project.overleaf': 1 } }
+  )
+}
+
+async function processProject(project, summary) {
+  const projectId = project._id.toString()
+  let updatedMongo = false
+  let updatedRedis = false
+  try {
+    const historyDocVersions = await getHistoryDocVersions(project)
+
+    for (const { docId, version } of historyDocVersions) {
+      const update = await fixDocVersion(docId, version)
+      if (update != null) {
+        if (update.in === 'mongo') {
+          updatedMongo = true
+        } else if (update.in === 'redis') {
+          updatedRedis = true
+        }
+      }
+    }
+
+    if (project.isDeleted) {
+      if (updatedMongo && updatedRedis) {
+        summary.deletedUpdatedBoth += 1
+      } else if (updatedMongo) {
+        summary.deletedUpdatedMongo += 1
+      } else if (updatedRedis) {
+        summary.deletedUpdatedRedis += 1
+      } else {
+        summary.deletedIgnored += 1
+      }
+    } else {
+      await recordProjectNeedsResync(projectId)
+      if (updatedMongo && updatedRedis) {
+        summary.updatedBoth += 1
+      } else if (updatedMongo) {
+        summary.updatedMongo += 1
+      } else if (updatedRedis) {
+        summary.updatedRedis += 1
+      } else {
+        summary.ignored += 1
+      }
+    }
+  } catch (err) {
+    logger.error({ err, projectId }, 'Failed to process project')
+    if (OPTIONS['skip-history-failures']) {
+      summary.skipped += 1
+    } else {
+      throw err
+    }
+  }
+}
+
+async function getHistoryDocVersions(project) {
+  const historyId = project.overleaf.history.id
+  const chunk = await chunkStore.loadLatest(historyId)
+  if (chunk == null) {
+    return []
+  }
+
+  const snapshot = chunk.getSnapshot()
+  const changes = chunk.getChanges()
+  snapshot.applyAll(changes)
+  const v2DocVersions = snapshot.getV2DocVersions()
+  if (v2DocVersions == null) {
+    return []
+  }
+  return Object.entries(v2DocVersions.data).map(([docId, versionInfo]) => ({
+    docId,
+    version: versionInfo.v,
+  }))
+}
+
+async function fixDocVersion(docId, historyVersion) {
+  const redisVersion = await getRedisDocVersion(docId)
+  if (redisVersion != null && historyVersion >= redisVersion) {
+    await setRedisDocVersion(docId, historyVersion + 1)
+    return {
+      in: 'redis',
+      previousVersion: redisVersion,
+      newVersion: historyVersion + 1,
+    }
+  } else {
+    const docBeforeUpdate = await db.docs.findOneAndUpdate(
+      {
+        _id: new ObjectId(docId),
+        $or: [
+          { version: { $lte: historyVersion } },
+          { version: { $exists: false } },
+        ],
+      },
+      { $set: { version: historyVersion + 1 } },
+      { projection: { _id: 1, version: 1 } }
+    )
+
+    if (docBeforeUpdate != null) {
+      return {
+        in: 'mongo',
+        previousVersion: docBeforeUpdate.version,
+        newVersion: historyVersion + 1,
+      }
+    } else {
+      return null
+    }
+  }
+}
+
+async function getRedisDocVersion(docId) {
+  if (!unflushedDocIds.has(docId)) {
+    return null
+  }
+  const result = await rclient.get(
+    Settings.redis.documentupdater.key_schema.docVersion({ doc_id: docId })
+  )
+  if (result == null) {
+    return null
+  }
+  return parseInt(result, 10)
+}
+
+async function setRedisDocVersion(docId, version) {
+  const multi = rclient.multi()
+  multi.set(
+    Settings.redis.documentupdater.key_schema.docVersion({ doc_id: docId }),
+    version
+  )
+  multi.set(`UnflushedTime:{${docId}}`, Date.now(), 'NX')
+  await multi.exec()
+}
+
+/**
+ * Set all remaining versions to 0
+ */
+async function backfillMissingVersions() {
+  console.log('Defaulting version to 0 for remaining docs.')
+  await db.docs.updateMany(
+    { version: { $exists: false } },
+    { $set: { version: 0 } }
+  )
+}
+
+main()
+  .finally(async () => {
+    console.log('Flushing log queue.')
+    await flushLogQueue()
+  })
+  .then(() => {
+    process.exit(0)
+  })
+  .catch(err => {
+    console.error(err)
+    process.exit(1)
+  })
--- a/services/history-v1/storage/scripts/recover_zip.js
+++ b/services/history-v1/storage/scripts/recover_zip.js
@@ -0,0 +1,255 @@
+/**
+ * Try to recover a zip of the latest version of a project using only data in
+ * GCS, where this data may have been (recently) hard deleted (i.e. may exist
+ * wholely or in part as non-current versions). This should be able to
+ * retrieve the latest content of a project up to 180 days after it was
+ * deleted.
+ *
+ * Usage:
+ * node recover_zip.js [--verbose] <HISTORY_ID> <HISTORY_ID> ...
+ *
+ * Output:
+ * Signed URL(s) for the uploaded zip files. Note that these are valid for
+ * only 24h, to match the lifecycle rule on the zip bucket.
+ */
+
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const util = require('node:util')
+
+// Something is registering 11 listeners, over the limit of 10, which generates
+// a lot of warning noise.
+require('node:events').EventEmitter.defaultMaxListeners = 11
+
+const config = require('config')
+// We depend on this via object-persistor.
+// eslint-disable-next-line import/no-extraneous-dependencies
+const { Storage } = require('@google-cloud/storage')
+const isValidUtf8 = require('utf-8-validate')
+
+const core = require('overleaf-editor-core')
+const projectKey = require('../lib/project_key')
+const streams = require('../lib/streams')
+const ProjectArchive = require('../lib/project_archive')
+
+const {
+  values: { verbose: VERBOSE },
+  positionals: HISTORY_IDS,
+} = util.parseArgs({
+  options: {
+    verbose: {
+      type: 'boolean',
+      default: false,
+    },
+  },
+  allowPositionals: true,
+})
+
+if (HISTORY_IDS.length === 0) {
+  console.error('no history IDs; see usage')
+  process.exit(1)
+}
+
+async function listDeletedChunks(historyId) {
+  const bucketName = config.get('chunkStore.bucket')
+  const storage = new Storage()
+  const [files] = await storage.bucket(bucketName).getFiles({
+    prefix: projectKey.format(historyId),
+    versions: true,
+  })
+  return files
+}
+
+async function findLatestChunk(historyId) {
+  const files = await listDeletedChunks(historyId)
+  if (files.length === 0) return null
+  files.sort((a, b) => {
+    if (a.name < b.name) return -1
+    if (a.name > b.name) return 1
+    return 0
+  })
+  return files[files.length - 1]
+}
+
+async function downloadLatestChunk(tmp, historyId) {
+  const latestChunkFile = await findLatestChunk(historyId)
+  if (!latestChunkFile) throw new Error('no chunk found to recover')
+
+  const destination = path.join(tmp, 'latest.json')
+  await latestChunkFile.download({ destination })
+  return destination
+}
+
+async function loadHistory(historyPathname) {
+  const data = await fs.promises.readFile(historyPathname)
+  const rawHistory = JSON.parse(data)
+  return core.History.fromRaw(rawHistory)
+}
+
+async function loadChunk(historyPathname, blobStore) {
+  const history = await loadHistory(historyPathname)
+
+  const blobHashes = new Set()
+  history.findBlobHashes(blobHashes)
+
+  await blobStore.fetchBlobs(blobHashes)
+  await history.loadFiles('lazy', blobStore)
+
+  return new core.Chunk(history, 0)
+}
+
+// TODO: it would be nice to export / expose this from BlobStore;
+// currently this is a copy of the method there.
+async function getStringLengthOfFile(byteLength, pathname) {
+  // We have to read the file into memory to get its UTF-8 length, so don't
+  // bother for files that are too large for us to edit anyway.
+  if (byteLength > core.Blob.MAX_EDITABLE_BYTE_LENGTH_BOUND) {
+    return null
+  }
+
+  // We need to check if the file contains nonBmp or null characters
+  let data = await fs.promises.readFile(pathname)
+  if (!isValidUtf8(data)) return null
+  data = data.toString()
+  if (data.length > core.TextOperation.MAX_STRING_LENGTH) return null
+  if (core.util.containsNonBmpChars(data)) return null
+  if (data.indexOf('\x00') !== -1) return null
+  return data.length
+}
+
+class RecoveryBlobStore {
+  constructor(historyId, tmp) {
+    this.historyId = historyId
+    this.tmp = tmp
+    this.blobs = new Map()
+  }
+
+  async fetchBlobs(blobHashes) {
+    for await (const blobHash of blobHashes) {
+      await this.fetchBlob(blobHash)
+    }
+  }
+
+  async fetchBlob(hash) {
+    if (this.blobs.has(hash)) return
+
+    if (VERBOSE) console.log('fetching blob', hash)
+
+    const bucketName = config.get('blobStore.projectBucket')
+    const storage = new Storage()
+    const [files] = await storage.bucket(bucketName).getFiles({
+      prefix: this.makeProjectBlobKey(hash),
+      versions: true,
+    })
+
+    const destination = this.getBlobPathname(hash)
+
+    if (files.length === 0) {
+      await this.fetchGlobalBlob(hash, destination)
+    } else if (files.length === 1) {
+      await files[0].download({ destination })
+    } else {
+      throw new Error('Multiple versions of blob ' + hash)
+    }
+
+    this.blobs.set(hash, await this.makeBlob(hash, destination))
+  }
+
+  async fetchGlobalBlob(hash, destination) {
+    const bucketName = config.get('blobStore.globalBucket')
+    const storage = new Storage()
+    const file = storage.bucket(bucketName).file(this.makeGlobalBlobKey(hash))
+    await file.download({ destination })
+  }
+
+  async makeBlob(hash, pathname) {
+    const stat = await fs.promises.stat(pathname)
+    const byteLength = stat.size
+    const stringLength = await getStringLengthOfFile(byteLength, pathname)
+    return new core.Blob(hash, byteLength, stringLength)
+  }
+
+  async getString(hash) {
+    const stream = await this.getStream(hash)
+    const buffer = await streams.readStreamToBuffer(stream)
+    return buffer.toString()
+  }
+
+  async getStream(hash) {
+    return fs.createReadStream(this.getBlobPathname(hash))
+  }
+
+  async getBlob(hash) {
+    return this.blobs.get(hash)
+  }
+
+  getBlobPathname(hash) {
+    return path.join(this.tmp, hash)
+  }
+
+  makeGlobalBlobKey(hash) {
+    return `${hash.slice(0, 2)}/${hash.slice(2, 4)}/${hash.slice(4)}`
+  }
+
+  makeProjectBlobKey(hash) {
+    return `${projectKey.format(this.historyId)}/${hash.slice(
+      0,
+      2
+    )}/${hash.slice(2)}`
+  }
+}
+
+async function uploadZip(historyId, zipPathname) {
+  const bucketName = config.get('zipStore.bucket')
+  const deadline = 24 * 3600 * 1000 // lifecycle limit on the zips bucket
+  const storage = new Storage()
+  const destination = `${historyId}-recovered.zip`
+  await storage.bucket(bucketName).upload(zipPathname, { destination })
+
+  const signedUrls = await storage
+    .bucket(bucketName)
+    .file(destination)
+    .getSignedUrl({
+      version: 'v4',
+      action: 'read',
+      expires: Date.now() + deadline,
+    })
+
+  return signedUrls[0]
+}
+
+async function restoreProject(historyId) {
+  const tmp = await fs.promises.mkdtemp(
+    path.join(os.tmpdir(), historyId.toString())
+  )
+  if (VERBOSE) console.log('recovering', historyId, 'in', tmp)
+
+  const latestJsonPathname = await downloadLatestChunk(tmp, historyId)
+  const blobStore = new RecoveryBlobStore(historyId, tmp)
+  const chunk = await loadChunk(latestJsonPathname, blobStore)
+
+  const snapshot = chunk.getSnapshot()
+  for (const change of chunk.getChanges()) {
+    change.applyTo(snapshot)
+  }
+
+  if (VERBOSE) console.log('zipping', historyId)
+
+  const zipPathname = path.join(tmp, `${historyId}.zip`)
+  const zipTimeoutMs = 60 * 1000
+  const archive = new ProjectArchive(snapshot, zipTimeoutMs)
+  await archive.writeZip(blobStore, zipPathname)
+
+  if (VERBOSE) console.log('uploading', historyId)
+
+  return await uploadZip(historyId, zipPathname)
+}
+
+async function main() {
+  for (const historyId of HISTORY_IDS) {
+    const signedUrl = await restoreProject(historyId)
+    console.log(signedUrl)
+  }
+}
+main().catch(console.error)
--- a/services/history-v1/storage/scripts/redis.mjs
+++ b/services/history-v1/storage/scripts/redis.mjs
@@ -0,0 +1,36 @@
+import redis from '@overleaf/redis-wrapper'
+import config from 'config'
+
+// Get allowed Redis dbs from config
+const redisConfig = config.get('redis')
+const allowedDbs = Object.keys(redisConfig)
+
+// Get the Redis db from command line argument or use the first available db as default
+const db = process.argv[2]
+
+// Validate redis db
+if (!allowedDbs.includes(db)) {
+  if (db) {
+    console.error('Invalid redis db:', db)
+  }
+  console.error(`Usage: node redis.mjs [${allowedDbs.join('|')}]`)
+  process.exit(1)
+}
+
+// Get redis options based on command line argument
+const redisOptions = config.get(`redis.${db}`)
+console.log('Using redis db:', db)
+console.log('REDIS CONFIG', {
+  ...redisOptions,
+  password: '*'.repeat(redisOptions.password?.length),
+})
+const rclient = redis.createClient(redisOptions)
+
+try {
+  await rclient.healthCheck()
+  console.log('REDIS HEALTHCHECK SUCCEEDED')
+} catch (error) {
+  console.error('REDIS HEALTHCHECK FAILED', error)
+} finally {
+  await rclient.quit()
+}
--- a/services/history-v1/storage/scripts/remove_backed_up_blobs.mjs
+++ b/services/history-v1/storage/scripts/remove_backed_up_blobs.mjs
@@ -0,0 +1,104 @@
+// @ts-check
+import { readFileSync } from 'node:fs'
+import commandLineArgs from 'command-line-args'
+import { client } from '../lib/mongodb.js'
+import {
+  getBackedUpBlobHashes,
+  unsetBackedUpBlobHashes,
+} from '../lib/backup_store/index.js'
+
+let gracefulShutdownInitiated = false
+
+// Parse command line arguments
+const args = commandLineArgs([
+  { name: 'input', type: String, alias: 'i', defaultOption: true },
+  { name: 'commit', type: Boolean, default: false },
+])
+
+if (!args.input) {
+  console.error(
+    'Usage: node remove_backed_up_blobs.mjs --input <csv-file> [--commit]'
+  )
+  process.exit(1)
+}
+
+if (!args.commit) {
+  console.log('Running in dry-run mode. Use --commit to apply changes.')
+}
+
+// Signal handling
+process.on('SIGINT', handleSignal)
+process.on('SIGTERM', handleSignal)
+
+function handleSignal() {
+  console.warn('Graceful shutdown initiated')
+  gracefulShutdownInitiated = true
+}
+
+// Process CSV and remove blobs
+async function main() {
+  const projectBlobs = new Map()
+  const lines = readFileSync(args.input, 'utf8').split('\n')
+  const SHA1_HEX_REGEX = /^[a-f0-9]{40}$/
+
+  // Skip header
+  for (const line of lines.slice(1)) {
+    if (!line.trim() || gracefulShutdownInitiated) break
+
+    const [projectId, path] = line.split(',')
+    const pathParts = path.split('/')
+    const hash = pathParts[3] + pathParts[4]
+
+    if (!SHA1_HEX_REGEX.test(hash)) {
+      console.warn(`Invalid SHA1 hash for project ${projectId}: ${hash}`)
+      continue
+    }
+
+    if (!projectBlobs.has(projectId)) {
+      projectBlobs.set(projectId, new Set())
+    }
+    projectBlobs.get(projectId).add(hash)
+  }
+
+  // Process each project
+  for (const [projectId, hashes] of projectBlobs) {
+    if (gracefulShutdownInitiated) break
+
+    if (!args.commit) {
+      console.log(
+        `DRY-RUN: would remove ${hashes.size} blobs from project ${projectId}`
+      )
+      continue
+    }
+
+    try {
+      const originalHashes = await getBackedUpBlobHashes(projectId)
+      if (originalHashes.size === 0) {
+        continue
+      }
+      const result = await unsetBackedUpBlobHashes(
+        projectId,
+        Array.from(hashes)
+      )
+      if (result) {
+        console.log(
+          `Project ${projectId}: want to remove ${hashes.size}, removed ${originalHashes.size - result.blobs.length}, ${result.blobs.length} remaining`
+        )
+      }
+    } catch (err) {
+      console.error(`Error updating project ${projectId}:`, err)
+    }
+  }
+}
+
+// Run the script
+main()
+  .catch(err => {
+    console.error('Fatal error:', err)
+    process.exitCode = 1
+  })
+  .finally(() => {
+    client
+      .close()
+      .catch(err => console.error('Error closing MongoDB connection:', err))
+  })
--- a/services/history-v1/storage/scripts/remove_backup_blobs_from_wrong_path.mjs
+++ b/services/history-v1/storage/scripts/remove_backup_blobs_from_wrong_path.mjs
@@ -0,0 +1,221 @@
+// @ts-check
+
+/**
+ * This script is used to remove blobs that have been backed up under the project ID
+ * instead of the history ID (where those are different).
+ *
+ * This script reads a CSV file with the following format:
+ * ```
+ * project_id,hash
+ * <mongo ID>,<hash>
+ * ```
+ *
+ * The header row is optional. All rows will be checked for conformance to the format.
+ */
+
+import commandLineArgs from 'command-line-args'
+import { backupPersistor, projectBlobsBucket } from '../lib/backupPersistor.mjs'
+import { makeProjectKey } from '../lib/blob_store/index.js'
+import fs from 'node:fs'
+import assert from '../lib/assert.js'
+import { client } from '../lib/mongodb.js'
+import { verifyBlobs } from '../lib/backupVerifier.mjs'
+import { setTimeout } from 'node:timers/promises'
+import { getHistoryId } from '../lib/backup_store/index.js'
+
+const argsSchema = [
+  {
+    name: 'input',
+    type: String,
+  },
+  {
+    name: 'commit',
+    type: Boolean,
+  },
+  {
+    name: 'header',
+    type: Boolean,
+  },
+  {
+    name: 'force',
+    type: Boolean,
+  },
+  {
+    name: 'verbose',
+    type: Boolean,
+  },
+]
+
+const args = commandLineArgs(argsSchema)
+
+async function gracefulClose(code = 0) {
+  await client.close()
+  process.exit(code)
+}
+
+/**
+ *
+ * @param {(value: unknown) => void} fn
+ * @param {unknown} value
+ * @return {boolean}
+ */
+function not(fn, value) {
+  try {
+    fn(value)
+    return false
+  } catch {
+    return true
+  }
+}
+
+/**
+ *
+ * @param {string} row
+ * @return {{projectId: string, hash: string}}
+ */
+function parseCSVRow(row) {
+  const [projectId, hash] = row.split(',')
+  assert.mongoId(projectId, `invalid projectId ${projectId}`)
+  assert.blobHash(hash, `invalid hash ${hash}`)
+  return { projectId, hash }
+}
+
+/**
+ *
+ * @param {string} path
+ * @param {boolean} hasHeader
+ * @return {AsyncGenerator<{projectId: string, hash: string}, void, *>}
+ */
+async function* readCSV(path, hasHeader) {
+  let seenHeader = !hasHeader
+  let fh
+  try {
+    fh = await fs.promises.open(path, 'r')
+  } catch (error) {
+    console.error(`Could not open file: ${error}`)
+    return await gracefulClose(1)
+  }
+  for await (const line of fh.readLines()) {
+    if (!seenHeader) {
+      const [first, second] = line.split(',')
+      const noDataInHeader =
+        not(assert.mongoId, first) && not(assert.blobHash, second)
+      if (!noDataInHeader) {
+        console.error('Data found in header row')
+        return await gracefulClose(1)
+      }
+      seenHeader = true
+      continue
+    }
+    try {
+      yield parseCSVRow(line)
+    } catch (error) {
+      console.error(error instanceof Error ? error.message : error)
+      console.info(`Skipping invalid row: ${line}`)
+    }
+  }
+}
+
+function usage() {
+  console.info(
+    'Usage: remove_blobs_from_backup.mjs --input <path> [--commit] [--header] [--force] [--verbose]'
+  )
+}
+
+if (!args.input) {
+  console.error('--input was missing')
+  usage()
+  await gracefulClose(1)
+}
+
+/**
+ *
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise<void>}
+ */
+async function deleteBlob(projectId, hash) {
+  const path = makeProjectKey(projectId, hash)
+  if (args.commit) {
+    await backupPersistor.deleteObject(projectBlobsBucket, path)
+  } else {
+    console.log(`DELETE: ${path}`)
+  }
+}
+
+/**
+ *
+ * @param {string} projectId
+ * @param {string} hash
+ * @return {Promise<void>}
+ */
+async function canDeleteBlob(projectId, hash) {
+  let historyId
+  try {
+    historyId = await getHistoryId(projectId)
+  } catch (error) {
+    if (args.verbose) {
+      console.error(error)
+    }
+    throw new Error(`No history ID found for project ${projectId}, skipping`)
+  }
+  if (historyId === projectId) {
+    throw new Error(
+      `Project ID and history ID are the same for ${projectId} - use --force to delete anyway`
+    )
+  }
+
+  // TODO: fix assert.postgresId to handle integers better and then stop coercing to string below
+  assert.postgresId(
+    `${historyId}`,
+    `History ID ${historyId} does not appear to be for a postgres project`
+  )
+
+  try {
+    await verifyBlobs(`${historyId}`, [hash])
+  } catch (error) {
+    if (args.verbose) {
+      console.error(error)
+    }
+    throw new Error(
+      `Blob ${hash} is not backed up for project ${projectId} - use --force to delete anyway`
+    )
+  }
+}
+
+if (!args.commit) {
+  console.log('DRY RUN: provide --commit to perform operations')
+}
+
+if (args.force) {
+  console.log(
+    'WARNING: --force is enabled, blobs will be deleted regardless of backup status'
+  )
+  await setTimeout(5_000)
+}
+
+let deleted = 0
+let errors = 0
+
+for await (const { projectId, hash } of readCSV(args.input, args.header)) {
+  if (!args.force) {
+    try {
+      await canDeleteBlob(projectId, hash)
+    } catch (error) {
+      console.error(error instanceof Error ? error.message : error)
+      continue
+    }
+  }
+  try {
+    await deleteBlob(projectId, hash)
+    deleted++
+  } catch (error) {
+    errors++
+    console.error(error)
+  }
+}
+
+console.log(`Deleted: ${deleted}`)
+console.log(`Errors: ${errors}`)
+
+await gracefulClose()
--- a/Show More
+++ b/Show More