first commit
This commit is contained in:
7
services/history-v1/storage/lib/chunk_store/errors.js
Normal file
7
services/history-v1/storage/lib/chunk_store/errors.js
Normal file
@@ -0,0 +1,7 @@
|
||||
const OError = require('@overleaf/o-error')
|
||||
|
||||
class ChunkVersionConflictError extends OError {}
|
||||
|
||||
module.exports = {
|
||||
ChunkVersionConflictError,
|
||||
}
|
||||
447
services/history-v1/storage/lib/chunk_store/index.js
Normal file
447
services/history-v1/storage/lib/chunk_store/index.js
Normal file
@@ -0,0 +1,447 @@
|
||||
// @ts-check
|
||||
|
||||
'use strict'
|
||||
|
||||
/**
|
||||
* Manage {@link Chunk} and {@link History} storage.
|
||||
*
|
||||
* For storage, chunks are immutable. If we want to update a project with new
|
||||
* changes, we create a new chunk record and History object and delete the old
|
||||
* ones. If we compact a project's history, we similarly destroy the old chunk
|
||||
* (or chunks) and replace them with a new one. This is helpful when using S3,
|
||||
* because it guarantees only eventual consistency for updates but provides
|
||||
* stronger consistency guarantees for object creation.
|
||||
*
|
||||
* When a chunk record in the database is removed, we save its ID for later
|
||||
* in the `old_chunks` table, rather than deleting it immediately. This lets us
|
||||
* use batch deletion to reduce the number of delete requests to S3.
|
||||
*
|
||||
* The chunk store also caches data about which blobs are referenced by each
|
||||
* chunk, which allows us to find unused blobs without loading all of the data
|
||||
* for all projects from S3. Whenever we create a chunk, we also insert records
|
||||
* into the `chunk_blobs` table, to help with this bookkeeping.
|
||||
*/
|
||||
|
||||
const config = require('config')
|
||||
const OError = require('@overleaf/o-error')
|
||||
const { Chunk, History, Snapshot } = require('overleaf-editor-core')
|
||||
|
||||
const assert = require('../assert')
|
||||
const BatchBlobStore = require('../batch_blob_store')
|
||||
const { BlobStore } = require('../blob_store')
|
||||
const { historyStore } = require('../history_store')
|
||||
const mongoBackend = require('./mongo')
|
||||
const postgresBackend = require('./postgres')
|
||||
const { ChunkVersionConflictError } = require('./errors')
|
||||
|
||||
const DEFAULT_DELETE_BATCH_SIZE = parseInt(config.get('maxDeleteKeys'), 10)
|
||||
const DEFAULT_DELETE_TIMEOUT_SECS = 3000 // 50 minutes
|
||||
const DEFAULT_DELETE_MIN_AGE_SECS = 86400 // 1 day
|
||||
|
||||
/**
|
||||
* Create the initial chunk for a project.
|
||||
*/
|
||||
async function initializeProject(projectId, snapshot) {
|
||||
if (projectId != null) {
|
||||
assert.projectId(projectId, 'bad projectId')
|
||||
} else {
|
||||
projectId = await postgresBackend.generateProjectId()
|
||||
}
|
||||
|
||||
if (snapshot != null) {
|
||||
assert.instance(snapshot, Snapshot, 'bad snapshot')
|
||||
} else {
|
||||
snapshot = new Snapshot()
|
||||
}
|
||||
|
||||
const blobStore = new BlobStore(projectId)
|
||||
await blobStore.initialize()
|
||||
|
||||
const backend = getBackend(projectId)
|
||||
const chunkRecord = await backend.getLatestChunk(projectId)
|
||||
if (chunkRecord != null) {
|
||||
throw new AlreadyInitialized(projectId)
|
||||
}
|
||||
|
||||
const history = new History(snapshot, [])
|
||||
const chunk = new Chunk(history, 0)
|
||||
await create(projectId, chunk)
|
||||
return projectId
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the blobs referenced in the given history
|
||||
*/
|
||||
async function lazyLoadHistoryFiles(history, batchBlobStore) {
|
||||
const blobHashes = new Set()
|
||||
history.findBlobHashes(blobHashes)
|
||||
|
||||
await batchBlobStore.preload(Array.from(blobHashes))
|
||||
await history.loadFiles('lazy', batchBlobStore)
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the latest Chunk stored for a project, including blob metadata.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Object} [opts]
|
||||
* @param {boolean} [opts.readOnly]
|
||||
* @return {Promise<{id: string, startVersion: number, endVersion: number, endTimestamp: Date}>}
|
||||
*/
|
||||
async function loadLatestRaw(projectId, opts) {
|
||||
assert.projectId(projectId, 'bad projectId')
|
||||
|
||||
const backend = getBackend(projectId)
|
||||
const chunkRecord = await backend.getLatestChunk(projectId, opts)
|
||||
if (chunkRecord == null) {
|
||||
throw new Chunk.NotFoundError(projectId)
|
||||
}
|
||||
return chunkRecord
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the latest Chunk stored for a project, including blob metadata.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @return {Promise.<Chunk>}
|
||||
*/
|
||||
async function loadLatest(projectId) {
|
||||
const chunkRecord = await loadLatestRaw(projectId)
|
||||
const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
|
||||
const history = History.fromRaw(rawHistory)
|
||||
const blobStore = new BlobStore(projectId)
|
||||
const batchBlobStore = new BatchBlobStore(blobStore)
|
||||
await lazyLoadHistoryFiles(history, batchBlobStore)
|
||||
return new Chunk(history, chunkRecord.startVersion)
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the the chunk that contains the given version, including blob metadata.
|
||||
*/
|
||||
async function loadAtVersion(projectId, version) {
|
||||
assert.projectId(projectId, 'bad projectId')
|
||||
assert.integer(version, 'bad version')
|
||||
|
||||
const backend = getBackend(projectId)
|
||||
const blobStore = new BlobStore(projectId)
|
||||
const batchBlobStore = new BatchBlobStore(blobStore)
|
||||
|
||||
const chunkRecord = await backend.getChunkForVersion(projectId, version)
|
||||
const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
|
||||
const history = History.fromRaw(rawHistory)
|
||||
await lazyLoadHistoryFiles(history, batchBlobStore)
|
||||
return new Chunk(history, chunkRecord.endVersion - history.countChanges())
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the chunk that contains the version that was current at the given
|
||||
* timestamp, including blob metadata.
|
||||
*/
|
||||
async function loadAtTimestamp(projectId, timestamp) {
|
||||
assert.projectId(projectId, 'bad projectId')
|
||||
assert.date(timestamp, 'bad timestamp')
|
||||
|
||||
const backend = getBackend(projectId)
|
||||
const blobStore = new BlobStore(projectId)
|
||||
const batchBlobStore = new BatchBlobStore(blobStore)
|
||||
|
||||
const chunkRecord = await backend.getChunkForTimestamp(projectId, timestamp)
|
||||
const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
|
||||
const history = History.fromRaw(rawHistory)
|
||||
await lazyLoadHistoryFiles(history, batchBlobStore)
|
||||
return new Chunk(history, chunkRecord.endVersion - history.countChanges())
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the chunk and insert corresponding records in the database.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Chunk} chunk
|
||||
* @param {Date} [earliestChangeTimestamp]
|
||||
*/
|
||||
async function create(projectId, chunk, earliestChangeTimestamp) {
|
||||
assert.projectId(projectId, 'bad projectId')
|
||||
assert.instance(chunk, Chunk, 'bad chunk')
|
||||
assert.maybe.date(earliestChangeTimestamp, 'bad timestamp')
|
||||
|
||||
const backend = getBackend(projectId)
|
||||
const chunkStart = chunk.getStartVersion()
|
||||
const chunkId = await uploadChunk(projectId, chunk)
|
||||
|
||||
const opts = {}
|
||||
if (chunkStart > 0) {
|
||||
opts.oldChunkId = await getChunkIdForVersion(projectId, chunkStart - 1)
|
||||
}
|
||||
if (earliestChangeTimestamp != null) {
|
||||
opts.earliestChangeTimestamp = earliestChangeTimestamp
|
||||
}
|
||||
|
||||
await backend.confirmCreate(projectId, chunk, chunkId, opts)
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload the given chunk to object storage.
|
||||
*
|
||||
* This is used by the create and update methods.
|
||||
*/
|
||||
async function uploadChunk(projectId, chunk) {
|
||||
const backend = getBackend(projectId)
|
||||
const blobStore = new BlobStore(projectId)
|
||||
|
||||
const historyStoreConcurrency = parseInt(
|
||||
config.get('chunkStore.historyStoreConcurrency'),
|
||||
10
|
||||
)
|
||||
|
||||
const rawHistory = await chunk
|
||||
.getHistory()
|
||||
.store(blobStore, historyStoreConcurrency)
|
||||
const chunkId = await backend.insertPendingChunk(projectId, chunk)
|
||||
await historyStore.storeRaw(projectId, chunkId, rawHistory)
|
||||
return chunkId
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend the project's history by replacing the latest chunk with a new
|
||||
* chunk.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} oldEndVersion
|
||||
* @param {Chunk} newChunk
|
||||
* @param {Date} [earliestChangeTimestamp]
|
||||
* @return {Promise}
|
||||
*/
|
||||
async function update(
|
||||
projectId,
|
||||
oldEndVersion,
|
||||
newChunk,
|
||||
earliestChangeTimestamp
|
||||
) {
|
||||
assert.projectId(projectId, 'bad projectId')
|
||||
assert.integer(oldEndVersion, 'bad oldEndVersion')
|
||||
assert.instance(newChunk, Chunk, 'bad newChunk')
|
||||
assert.maybe.date(earliestChangeTimestamp, 'bad timestamp')
|
||||
|
||||
const backend = getBackend(projectId)
|
||||
const oldChunkId = await getChunkIdForVersion(projectId, oldEndVersion)
|
||||
const newChunkId = await uploadChunk(projectId, newChunk)
|
||||
|
||||
const opts = {}
|
||||
if (earliestChangeTimestamp != null) {
|
||||
opts.earliestChangeTimestamp = earliestChangeTimestamp
|
||||
}
|
||||
|
||||
await backend.confirmUpdate(projectId, oldChunkId, newChunk, newChunkId, opts)
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the chunk ID for a given version of a project.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
* @return {Promise.<string>}
|
||||
*/
|
||||
async function getChunkIdForVersion(projectId, version) {
|
||||
const backend = getBackend(projectId)
|
||||
const chunkRecord = await backend.getChunkForVersion(projectId, version)
|
||||
return chunkRecord.id
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the chunk metadata for a given version of a project.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
* @return {Promise.<{id: string|number, startVersion: number, endVersion: number}>}
|
||||
*/
|
||||
async function getChunkMetadataForVersion(projectId, version) {
|
||||
const backend = getBackend(projectId)
|
||||
const chunkRecord = await backend.getChunkForVersion(projectId, version)
|
||||
return chunkRecord
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of a project's chunk ids
|
||||
*/
|
||||
async function getProjectChunkIds(projectId) {
|
||||
const backend = getBackend(projectId)
|
||||
const chunkIds = await backend.getProjectChunkIds(projectId)
|
||||
return chunkIds
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of a projects chunks directly
|
||||
*/
|
||||
async function getProjectChunks(projectId) {
|
||||
const backend = getBackend(projectId)
|
||||
const chunkIds = await backend.getProjectChunks(projectId)
|
||||
return chunkIds
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the chunk for a given chunk record, including blob metadata.
|
||||
*/
|
||||
async function loadByChunkRecord(projectId, chunkRecord) {
|
||||
const blobStore = new BlobStore(projectId)
|
||||
const batchBlobStore = new BatchBlobStore(blobStore)
|
||||
const { raw: rawHistory, buffer: chunkBuffer } =
|
||||
await historyStore.loadRawWithBuffer(projectId, chunkRecord.id)
|
||||
const history = History.fromRaw(rawHistory)
|
||||
await lazyLoadHistoryFiles(history, batchBlobStore)
|
||||
return {
|
||||
chunk: new Chunk(history, chunkRecord.endVersion - history.countChanges()),
|
||||
chunkBuffer,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Asynchronously retrieves project chunks starting from a specific version.
|
||||
*
|
||||
* This generator function yields chunk records for a given project starting from the specified version (inclusive).
|
||||
* It continues to fetch and yield subsequent chunk records until the end version of the latest chunk metadata is reached.
|
||||
* If you want to fetch all the chunks *after* a version V, call this function with V+1.
|
||||
*
|
||||
* @param {string} projectId - The ID of the project.
|
||||
* @param {number} version - The starting version to retrieve chunks from.
|
||||
* @returns {AsyncGenerator<Object, void, undefined>} An async generator that yields chunk records.
|
||||
*/
|
||||
async function* getProjectChunksFromVersion(projectId, version) {
|
||||
const backend = getBackend(projectId)
|
||||
const latestChunkMetadata = await loadLatestRaw(projectId)
|
||||
if (!latestChunkMetadata || version > latestChunkMetadata.endVersion) {
|
||||
return
|
||||
}
|
||||
let chunkRecord = await backend.getChunkForVersion(projectId, version)
|
||||
while (chunkRecord != null) {
|
||||
yield chunkRecord
|
||||
if (chunkRecord.endVersion >= latestChunkMetadata.endVersion) {
|
||||
break
|
||||
} else {
|
||||
chunkRecord = await backend.getChunkForVersion(
|
||||
projectId,
|
||||
chunkRecord.endVersion + 1
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the given chunk from the database.
|
||||
*
|
||||
* This doesn't delete the chunk from object storage yet. The old chunks
|
||||
* collection will do that.
|
||||
*/
|
||||
async function destroy(projectId, chunkId) {
|
||||
const backend = getBackend(projectId)
|
||||
await backend.deleteChunk(projectId, chunkId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all of a project's chunks from the database.
|
||||
*/
|
||||
async function deleteProjectChunks(projectId) {
|
||||
const backend = getBackend(projectId)
|
||||
await backend.deleteProjectChunks(projectId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a given number of old chunks from both the database
|
||||
* and from object storage.
|
||||
*
|
||||
* @param {object} options
|
||||
* @param {number} [options.batchSize] - number of chunks to delete in each
|
||||
* batch
|
||||
* @param {number} [options.maxBatches] - maximum number of batches to process
|
||||
* @param {number} [options.minAgeSecs] - minimum age of chunks to delete
|
||||
* @param {number} [options.timeout] - maximum time to spend deleting chunks
|
||||
*
|
||||
* @return {Promise<number>} number of chunks deleted
|
||||
*/
|
||||
async function deleteOldChunks(options = {}) {
|
||||
const batchSize = options.batchSize ?? DEFAULT_DELETE_BATCH_SIZE
|
||||
const maxBatches = options.maxBatches ?? Number.MAX_SAFE_INTEGER
|
||||
const minAgeSecs = options.minAgeSecs ?? DEFAULT_DELETE_MIN_AGE_SECS
|
||||
const timeout = options.timeout ?? DEFAULT_DELETE_TIMEOUT_SECS
|
||||
assert.greater(batchSize, 0)
|
||||
assert.greater(timeout, 0)
|
||||
assert.greater(maxBatches, 0)
|
||||
assert.greaterOrEqual(minAgeSecs, 0)
|
||||
|
||||
const timeoutAfter = Date.now() + timeout * 1000
|
||||
let deletedChunksTotal = 0
|
||||
for (const backend of [postgresBackend, mongoBackend]) {
|
||||
for (let i = 0; i < maxBatches; i++) {
|
||||
if (Date.now() > timeoutAfter) {
|
||||
break
|
||||
}
|
||||
const deletedChunks = await deleteOldChunksBatch(
|
||||
backend,
|
||||
batchSize,
|
||||
minAgeSecs
|
||||
)
|
||||
deletedChunksTotal += deletedChunks.length
|
||||
if (deletedChunks.length !== batchSize) {
|
||||
// Last batch was incomplete. There probably are no old chunks left
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return deletedChunksTotal
|
||||
}
|
||||
|
||||
async function deleteOldChunksBatch(backend, count, minAgeSecs) {
|
||||
assert.greater(count, 0, 'bad count')
|
||||
assert.greaterOrEqual(minAgeSecs, 0, 'bad minAgeSecs')
|
||||
|
||||
const oldChunks = await backend.getOldChunksBatch(count, minAgeSecs)
|
||||
if (oldChunks.length === 0) {
|
||||
return []
|
||||
}
|
||||
await historyStore.deleteChunks(oldChunks)
|
||||
await backend.deleteOldChunks(oldChunks.map(chunk => chunk.chunkId))
|
||||
return oldChunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the appropriate backend for the given project id
|
||||
*
|
||||
* Numeric ids use the Postgres backend.
|
||||
* Strings of 24 characters use the Mongo backend.
|
||||
*/
|
||||
function getBackend(projectId) {
|
||||
if (assert.POSTGRES_ID_REGEXP.test(projectId)) {
|
||||
return postgresBackend
|
||||
} else if (assert.MONGO_ID_REGEXP.test(projectId)) {
|
||||
return mongoBackend
|
||||
} else {
|
||||
throw new OError('bad project id', { projectId })
|
||||
}
|
||||
}
|
||||
|
||||
class AlreadyInitialized extends OError {
|
||||
constructor(projectId) {
|
||||
super('Project is already initialized', { projectId })
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getBackend,
|
||||
initializeProject,
|
||||
loadLatest,
|
||||
loadLatestRaw,
|
||||
loadAtVersion,
|
||||
loadAtTimestamp,
|
||||
loadByChunkRecord,
|
||||
create,
|
||||
update,
|
||||
destroy,
|
||||
getChunkIdForVersion,
|
||||
getChunkMetadataForVersion,
|
||||
getProjectChunkIds,
|
||||
getProjectChunks,
|
||||
getProjectChunksFromVersion,
|
||||
deleteProjectChunks,
|
||||
deleteOldChunks,
|
||||
AlreadyInitialized,
|
||||
ChunkVersionConflictError,
|
||||
}
|
||||
526
services/history-v1/storage/lib/chunk_store/mongo.js
Normal file
526
services/history-v1/storage/lib/chunk_store/mongo.js
Normal file
@@ -0,0 +1,526 @@
|
||||
// @ts-check
|
||||
|
||||
const { ObjectId, ReadPreference, MongoError } = require('mongodb')
|
||||
const { Chunk } = require('overleaf-editor-core')
|
||||
const OError = require('@overleaf/o-error')
|
||||
const assert = require('../assert')
|
||||
const mongodb = require('../mongodb')
|
||||
const { ChunkVersionConflictError } = require('./errors')
|
||||
|
||||
const DUPLICATE_KEY_ERROR_CODE = 11000
|
||||
|
||||
/**
|
||||
* @import { ClientSession } from 'mongodb'
|
||||
*/
|
||||
|
||||
/**
|
||||
* Get the latest chunk's metadata from the database
|
||||
* @param {string} projectId
|
||||
* @param {Object} [opts]
|
||||
* @param {boolean} [opts.readOnly]
|
||||
*/
|
||||
async function getLatestChunk(projectId, opts = {}) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
const { readOnly = false } = opts
|
||||
|
||||
const record = await mongodb.chunks.findOne(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
},
|
||||
{
|
||||
sort: { startVersion: -1 },
|
||||
readPreference: readOnly
|
||||
? ReadPreference.secondaryPreferred
|
||||
: ReadPreference.primary,
|
||||
}
|
||||
)
|
||||
if (record == null) {
|
||||
return null
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the given version.
|
||||
*/
|
||||
async function getChunkForVersion(projectId, version) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.integer(version, 'bad version')
|
||||
|
||||
const record = await mongodb.chunks.findOne(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
startVersion: { $lte: version },
|
||||
endVersion: { $gte: version },
|
||||
},
|
||||
{ sort: { startVersion: 1 } }
|
||||
)
|
||||
if (record == null) {
|
||||
throw new Chunk.VersionNotFoundError(projectId, version)
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the given version before the endTime.
|
||||
*/
|
||||
async function getFirstChunkBeforeTimestamp(projectId, timestamp) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.date(timestamp, 'bad timestamp')
|
||||
|
||||
const recordActive = await getChunkForVersion(projectId, 0)
|
||||
if (recordActive && recordActive.endTimestamp <= timestamp) {
|
||||
return recordActive
|
||||
}
|
||||
|
||||
// fallback to deleted chunk
|
||||
const recordDeleted = await mongodb.chunks.findOne(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: 'deleted',
|
||||
startVersion: 0,
|
||||
updatedAt: { $lte: timestamp }, // indexed for state=deleted
|
||||
endTimestamp: { $lte: timestamp },
|
||||
},
|
||||
{ sort: { updatedAt: -1 } }
|
||||
)
|
||||
if (recordDeleted) {
|
||||
return chunkFromRecord(recordDeleted)
|
||||
}
|
||||
throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the version that was current at
|
||||
* the given timestamp.
|
||||
*/
|
||||
async function getChunkForTimestamp(projectId, timestamp) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.date(timestamp, 'bad timestamp')
|
||||
|
||||
const record = await mongodb.chunks.findOne(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
endTimestamp: { $gte: timestamp },
|
||||
},
|
||||
// We use the index on the startVersion for sorting records. This assumes
|
||||
// that timestamps go up with each version.
|
||||
{ sort: { startVersion: 1 } }
|
||||
)
|
||||
|
||||
if (record == null) {
|
||||
// Couldn't find a chunk that had modifications after the given timestamp.
|
||||
// Fetch the latest chunk instead.
|
||||
const chunk = await getLatestChunk(projectId)
|
||||
if (chunk == null) {
|
||||
throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
|
||||
}
|
||||
return chunk
|
||||
}
|
||||
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the version that was current before
|
||||
* the given timestamp.
|
||||
*/
|
||||
async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.date(timestamp, 'bad timestamp')
|
||||
|
||||
const record = await mongodb.chunks.findOne(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
$or: [
|
||||
{
|
||||
endTimestamp: {
|
||||
$lte: timestamp,
|
||||
},
|
||||
},
|
||||
{
|
||||
endTimestamp: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
// We use the index on the startVersion for sorting records. This assumes
|
||||
// that timestamps go up with each version.
|
||||
{ sort: { startVersion: -1 } }
|
||||
)
|
||||
if (record == null) {
|
||||
throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of a project's chunk ids
|
||||
*/
|
||||
async function getProjectChunkIds(projectId) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
|
||||
const cursor = mongodb.chunks.find(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
},
|
||||
{ projection: { _id: 1 } }
|
||||
)
|
||||
return await cursor.map(record => record._id).toArray()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of a projects chunks directly
|
||||
*/
|
||||
async function getProjectChunks(projectId) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
|
||||
const cursor = mongodb.chunks
|
||||
.find(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
},
|
||||
{ projection: { state: 0 } }
|
||||
)
|
||||
.sort({ startVersion: 1 })
|
||||
return await cursor.map(chunkFromRecord).toArray()
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a pending chunk before sending it to object storage.
|
||||
*/
|
||||
async function insertPendingChunk(projectId, chunk) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.instance(chunk, Chunk, 'bad chunk')
|
||||
|
||||
const chunkId = new ObjectId()
|
||||
await mongodb.chunks.insertOne({
|
||||
_id: chunkId,
|
||||
projectId: new ObjectId(projectId),
|
||||
startVersion: chunk.getStartVersion(),
|
||||
endVersion: chunk.getEndVersion(),
|
||||
endTimestamp: chunk.getEndTimestamp(),
|
||||
state: 'pending',
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
return chunkId.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that a new chunk was created.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Chunk} chunk
|
||||
* @param {string} chunkId
|
||||
* @param {object} opts
|
||||
* @param {Date} [opts.earliestChangeTimestamp]
|
||||
* @param {string} [opts.oldChunkId]
|
||||
*/
|
||||
async function confirmCreate(projectId, chunk, chunkId, opts = {}) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.instance(chunk, Chunk, 'bad newChunk')
|
||||
assert.mongoId(chunkId, 'bad newChunkId')
|
||||
|
||||
await mongodb.client.withSession(async session => {
|
||||
await session.withTransaction(async () => {
|
||||
if (opts.oldChunkId != null) {
|
||||
await closeChunk(projectId, opts.oldChunkId, { session })
|
||||
}
|
||||
|
||||
await activateChunk(projectId, chunkId, { session })
|
||||
|
||||
await updateProjectRecord(
|
||||
projectId,
|
||||
chunk,
|
||||
opts.earliestChangeTimestamp,
|
||||
{ session }
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the metadata to the project record
|
||||
*/
|
||||
async function updateProjectRecord(
|
||||
projectId,
|
||||
chunk,
|
||||
earliestChangeTimestamp,
|
||||
mongoOpts = {}
|
||||
) {
|
||||
// record the end version against the project
|
||||
await mongodb.projects.updateOne(
|
||||
{
|
||||
'overleaf.history.id': projectId, // string for Object ids, number for postgres ids
|
||||
},
|
||||
{
|
||||
// always store the latest end version and timestamp for the chunk
|
||||
$max: {
|
||||
'overleaf.history.currentEndVersion': chunk.getEndVersion(),
|
||||
'overleaf.history.currentEndTimestamp': chunk.getEndTimestamp(),
|
||||
'overleaf.history.updatedAt': new Date(),
|
||||
},
|
||||
// store the first pending change timestamp for the chunk, this will
|
||||
// be cleared every time a backup is completed.
|
||||
$min: {
|
||||
'overleaf.backup.pendingChangeAt':
|
||||
earliestChangeTimestamp || chunk.getEndTimestamp() || new Date(),
|
||||
},
|
||||
},
|
||||
mongoOpts
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that a chunk was replaced by a new one.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} oldChunkId
|
||||
* @param {Chunk} newChunk
|
||||
* @param {string} newChunkId
|
||||
* @param {object} [opts]
|
||||
* @param {Date} [opts.earliestChangeTimestamp]
|
||||
*/
|
||||
async function confirmUpdate(
|
||||
projectId,
|
||||
oldChunkId,
|
||||
newChunk,
|
||||
newChunkId,
|
||||
opts = {}
|
||||
) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.mongoId(oldChunkId, 'bad oldChunkId')
|
||||
assert.instance(newChunk, Chunk, 'bad newChunk')
|
||||
assert.mongoId(newChunkId, 'bad newChunkId')
|
||||
|
||||
await mongodb.client.withSession(async session => {
|
||||
await session.withTransaction(async () => {
|
||||
await deleteActiveChunk(projectId, oldChunkId, { session })
|
||||
|
||||
await activateChunk(projectId, newChunkId, { session })
|
||||
|
||||
await updateProjectRecord(
|
||||
projectId,
|
||||
newChunk,
|
||||
opts.earliestChangeTimestamp,
|
||||
{ session }
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Activate a pending chunk
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
* @param {object} [opts]
|
||||
* @param {ClientSession} [opts.session]
|
||||
*/
|
||||
async function activateChunk(projectId, chunkId, opts = {}) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.mongoId(chunkId, 'bad chunkId')
|
||||
|
||||
let result
|
||||
try {
|
||||
result = await mongodb.chunks.updateOne(
|
||||
{
|
||||
_id: new ObjectId(chunkId),
|
||||
projectId: new ObjectId(projectId),
|
||||
state: 'pending',
|
||||
},
|
||||
{ $set: { state: 'active', updatedAt: new Date() } },
|
||||
opts
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof MongoError && err.code === DUPLICATE_KEY_ERROR_CODE) {
|
||||
throw new ChunkVersionConflictError('chunk start version is not unique', {
|
||||
projectId,
|
||||
chunkId,
|
||||
})
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
if (result.matchedCount === 0) {
|
||||
throw new OError('pending chunk not found', { projectId, chunkId })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a chunk
|
||||
*
|
||||
* A closed chunk is one that can't be extended anymore.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
* @param {object} [opts]
|
||||
* @param {ClientSession} [opts.session]
|
||||
*/
|
||||
async function closeChunk(projectId, chunkId, opts = {}) {
|
||||
const result = await mongodb.chunks.updateOne(
|
||||
{
|
||||
_id: new ObjectId(chunkId),
|
||||
projectId: new ObjectId(projectId),
|
||||
state: 'active',
|
||||
},
|
||||
{ $set: { state: 'closed' } },
|
||||
opts
|
||||
)
|
||||
|
||||
if (result.matchedCount === 0) {
|
||||
throw new ChunkVersionConflictError('unable to close chunk', {
|
||||
projectId,
|
||||
chunkId,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete an active chunk
|
||||
*
|
||||
* This is used to delete chunks that are in the process of being extended. It
|
||||
* will refuse to delete chunks that are already closed and can therefore not be
|
||||
* extended.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
* @param {object} [opts]
|
||||
* @param {ClientSession} [opts.session]
|
||||
*/
|
||||
async function deleteActiveChunk(projectId, chunkId, opts = {}) {
|
||||
const updateResult = await mongodb.chunks.updateOne(
|
||||
{
|
||||
_id: new ObjectId(chunkId),
|
||||
projectId: new ObjectId(projectId),
|
||||
state: 'active',
|
||||
},
|
||||
{ $set: { state: 'deleted', updatedAt: new Date() } },
|
||||
opts
|
||||
)
|
||||
|
||||
if (updateResult.matchedCount === 0) {
|
||||
throw new ChunkVersionConflictError('unable to delete active chunk', {
|
||||
projectId,
|
||||
chunkId,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a chunk.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
* @return {Promise}
|
||||
*/
|
||||
async function deleteChunk(projectId, chunkId, mongoOpts = {}) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
assert.mongoId(chunkId, 'bad chunkId')
|
||||
|
||||
await mongodb.chunks.updateOne(
|
||||
{ _id: new ObjectId(chunkId), projectId: new ObjectId(projectId) },
|
||||
{ $set: { state: 'deleted', updatedAt: new Date() } },
|
||||
mongoOpts
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all of a project's chunks
|
||||
*/
|
||||
async function deleteProjectChunks(projectId) {
|
||||
assert.mongoId(projectId, 'bad projectId')
|
||||
|
||||
await mongodb.chunks.updateMany(
|
||||
{
|
||||
projectId: new ObjectId(projectId),
|
||||
state: { $in: ['active', 'closed'] },
|
||||
},
|
||||
{ $set: { state: 'deleted', updatedAt: new Date() } }
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a batch of old chunks for deletion
|
||||
*/
|
||||
async function getOldChunksBatch(count, minAgeSecs) {
|
||||
const maxUpdatedAt = new Date(Date.now() - minAgeSecs * 1000)
|
||||
const batch = []
|
||||
|
||||
// We need to fetch one state at a time to take advantage of the partial
|
||||
// indexes on the chunks collection.
|
||||
//
|
||||
// Mongo 6.0 allows partial indexes that use the $in operator. When we reach
|
||||
// that Mongo version, we can create a partial index on both the deleted and
|
||||
// pending states and simplify this logic a bit.
|
||||
for (const state of ['deleted', 'pending']) {
|
||||
if (count === 0) {
|
||||
// There's no more space in the batch
|
||||
break
|
||||
}
|
||||
|
||||
const cursor = mongodb.chunks
|
||||
.find(
|
||||
{ state, updatedAt: { $lt: maxUpdatedAt } },
|
||||
{
|
||||
limit: count,
|
||||
projection: { _id: 1, projectId: 1 },
|
||||
}
|
||||
)
|
||||
.map(record => ({
|
||||
chunkId: record._id.toString(),
|
||||
projectId: record.projectId.toString(),
|
||||
}))
|
||||
|
||||
for await (const record of cursor) {
|
||||
batch.push(record)
|
||||
count -= 1
|
||||
}
|
||||
}
|
||||
return batch
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a batch of old chunks from the database
|
||||
*/
|
||||
async function deleteOldChunks(chunkIds) {
|
||||
await mongodb.chunks.deleteMany({
|
||||
_id: { $in: chunkIds.map(id => new ObjectId(id)) },
|
||||
state: { $in: ['deleted', 'pending'] },
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a chunk metadata object from the database record
|
||||
*/
|
||||
function chunkFromRecord(record) {
|
||||
return {
|
||||
id: record._id.toString(),
|
||||
startVersion: record.startVersion,
|
||||
endVersion: record.endVersion,
|
||||
endTimestamp: record.endTimestamp,
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getLatestChunk,
|
||||
getFirstChunkBeforeTimestamp,
|
||||
getLastActiveChunkBeforeTimestamp,
|
||||
getChunkForVersion,
|
||||
getChunkForTimestamp,
|
||||
getProjectChunkIds,
|
||||
getProjectChunks,
|
||||
insertPendingChunk,
|
||||
confirmCreate,
|
||||
confirmUpdate,
|
||||
updateProjectRecord,
|
||||
deleteChunk,
|
||||
deleteProjectChunks,
|
||||
getOldChunksBatch,
|
||||
deleteOldChunks,
|
||||
}
|
||||
487
services/history-v1/storage/lib/chunk_store/postgres.js
Normal file
487
services/history-v1/storage/lib/chunk_store/postgres.js
Normal file
@@ -0,0 +1,487 @@
|
||||
// @ts-check
|
||||
|
||||
const { Chunk } = require('overleaf-editor-core')
|
||||
const assert = require('../assert')
|
||||
const knex = require('../knex')
|
||||
const knexReadOnly = require('../knex_read_only')
|
||||
const { ChunkVersionConflictError } = require('./errors')
|
||||
const { updateProjectRecord } = require('./mongo')
|
||||
|
||||
const DUPLICATE_KEY_ERROR_CODE = '23505'
|
||||
|
||||
/**
|
||||
* @import { Knex } from 'knex'
|
||||
*/
|
||||
|
||||
/**
|
||||
* Get the latest chunk's metadata from the database
|
||||
* @param {string} projectId
|
||||
* @param {Object} [opts]
|
||||
* @param {boolean} [opts.readOnly]
|
||||
*/
|
||||
async function getLatestChunk(projectId, opts = {}) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
const { readOnly = false } = opts
|
||||
|
||||
const record = await (readOnly ? knexReadOnly : knex)('chunks')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.orderBy('end_version', 'desc')
|
||||
.first()
|
||||
if (record == null) {
|
||||
return null
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the given version.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
*/
|
||||
async function getChunkForVersion(projectId, version) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
const record = await knex('chunks')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.where('end_version', '>=', version)
|
||||
.orderBy('end_version')
|
||||
.first()
|
||||
if (!record) {
|
||||
throw new Chunk.VersionNotFoundError(projectId, version)
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the given version.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Date} timestamp
|
||||
*/
|
||||
async function getFirstChunkBeforeTimestamp(projectId, timestamp) {
|
||||
assert.date(timestamp, 'bad timestamp')
|
||||
|
||||
const recordActive = await getChunkForVersion(projectId, 0)
|
||||
|
||||
// projectId must be valid if getChunkForVersion did not throw
|
||||
if (recordActive && recordActive.endTimestamp <= timestamp) {
|
||||
return recordActive
|
||||
}
|
||||
|
||||
// fallback to deleted chunk
|
||||
const recordDeleted = await knex('old_chunks')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.where('start_version', '=', 0)
|
||||
.where('end_timestamp', '<=', timestamp)
|
||||
.orderBy('end_version', 'desc')
|
||||
.first()
|
||||
if (recordDeleted) {
|
||||
return chunkFromRecord(recordDeleted)
|
||||
}
|
||||
throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the version that was current at
|
||||
* the given timestamp.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Date} timestamp
|
||||
*/
|
||||
async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) {
|
||||
assert.date(timestamp, 'bad timestamp')
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
const query = knex('chunks')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.where(function () {
|
||||
this.where('end_timestamp', '<=', timestamp).orWhere(
|
||||
'end_timestamp',
|
||||
null
|
||||
)
|
||||
})
|
||||
.orderBy('end_version', 'desc', 'last')
|
||||
|
||||
const record = await query.first()
|
||||
|
||||
if (!record) {
|
||||
throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the metadata for the chunk that contains the version that was current at
|
||||
* the given timestamp.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Date} timestamp
|
||||
*/
|
||||
async function getChunkForTimestamp(projectId, timestamp) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
// This query will find the latest chunk after the timestamp (query orders
|
||||
// in reverse chronological order), OR the latest chunk
|
||||
// This accounts for the case where the timestamp is ahead of the chunk's
|
||||
// timestamp and therefore will not return any results
|
||||
const whereAfterEndTimestampOrLatestChunk = knex.raw(
|
||||
'end_timestamp >= ? ' +
|
||||
'OR id = ( ' +
|
||||
'SELECT id FROM chunks ' +
|
||||
'WHERE doc_id = ? ' +
|
||||
'ORDER BY end_version desc LIMIT 1' +
|
||||
')',
|
||||
[timestamp, parseInt(projectId, 10)]
|
||||
)
|
||||
|
||||
const record = await knex('chunks')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.where(whereAfterEndTimestampOrLatestChunk)
|
||||
.orderBy('end_version')
|
||||
.first()
|
||||
if (!record) {
|
||||
throw new Chunk.BeforeTimestampNotFoundError(projectId, timestamp)
|
||||
}
|
||||
return chunkFromRecord(record)
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a chunk metadata object from the database record
|
||||
*/
|
||||
function chunkFromRecord(record) {
|
||||
return {
|
||||
id: record.id.toString(),
|
||||
startVersion: record.start_version,
|
||||
endVersion: record.end_version,
|
||||
endTimestamp: record.end_timestamp,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of a project's chunk ids
|
||||
*
|
||||
* @param {string} projectId
|
||||
*/
|
||||
async function getProjectChunkIds(projectId) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
const records = await knex('chunks')
|
||||
.select('id')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
return records.map(record => record.id)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of a projects chunks directly
|
||||
*
|
||||
* @param {string} projectId
|
||||
*/
|
||||
async function getProjectChunks(projectId) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
const records = await knex('chunks')
|
||||
.select()
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.orderBy('end_version')
|
||||
return records.map(chunkFromRecord)
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a pending chunk before sending it to object storage.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Chunk} chunk
|
||||
*/
|
||||
async function insertPendingChunk(projectId, chunk) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
const result = await knex.first(
|
||||
knex.raw("nextval('chunks_id_seq'::regclass)::integer as chunkid")
|
||||
)
|
||||
const chunkId = result.chunkid
|
||||
await knex('pending_chunks').insert({
|
||||
id: chunkId,
|
||||
doc_id: parseInt(projectId, 10),
|
||||
end_version: chunk.getEndVersion(),
|
||||
start_version: chunk.getStartVersion(),
|
||||
end_timestamp: chunk.getEndTimestamp(),
|
||||
})
|
||||
return chunkId.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that a new chunk was created.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {Chunk} chunk
|
||||
* @param {string} chunkId
|
||||
* @param {object} opts
|
||||
* @param {Date} [opts.earliestChangeTimestamp]
|
||||
* @param {string} [opts.oldChunkId]
|
||||
*/
|
||||
async function confirmCreate(projectId, chunk, chunkId, opts = {}) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
await knex.transaction(async tx => {
|
||||
if (opts.oldChunkId != null) {
|
||||
await _assertChunkIsNotClosed(tx, projectId, opts.oldChunkId)
|
||||
await _closeChunk(tx, projectId, opts.oldChunkId)
|
||||
}
|
||||
await Promise.all([
|
||||
_deletePendingChunk(tx, projectId, chunkId),
|
||||
_insertChunk(tx, projectId, chunk, chunkId),
|
||||
])
|
||||
await updateProjectRecord(
|
||||
// The history id in Mongo is an integer for Postgres projects
|
||||
parseInt(projectId, 10),
|
||||
chunk,
|
||||
opts.earliestChangeTimestamp
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that a chunk was replaced by a new one.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} oldChunkId
|
||||
* @param {Chunk} newChunk
|
||||
* @param {string} newChunkId
|
||||
*/
|
||||
async function confirmUpdate(
|
||||
projectId,
|
||||
oldChunkId,
|
||||
newChunk,
|
||||
newChunkId,
|
||||
opts = {}
|
||||
) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
await knex.transaction(async tx => {
|
||||
await _assertChunkIsNotClosed(tx, projectId, oldChunkId)
|
||||
await _deleteChunks(tx, { doc_id: projectId, id: oldChunkId })
|
||||
await Promise.all([
|
||||
_deletePendingChunk(tx, projectId, newChunkId),
|
||||
_insertChunk(tx, projectId, newChunk, newChunkId),
|
||||
])
|
||||
await updateProjectRecord(
|
||||
// The history id in Mongo is an integer for Postgres projects
|
||||
parseInt(projectId, 10),
|
||||
newChunk,
|
||||
opts.earliestChangeTimestamp
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a pending chunk
|
||||
*
|
||||
* @param {Knex} tx
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
*/
|
||||
async function _deletePendingChunk(tx, projectId, chunkId) {
|
||||
await tx('pending_chunks')
|
||||
.where({
|
||||
doc_id: parseInt(projectId, 10),
|
||||
id: parseInt(chunkId, 10),
|
||||
})
|
||||
.del()
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an active chunk
|
||||
*
|
||||
* @param {Knex} tx
|
||||
* @param {string} projectId
|
||||
* @param {Chunk} chunk
|
||||
* @param {string} chunkId
|
||||
*/
|
||||
async function _insertChunk(tx, projectId, chunk, chunkId) {
|
||||
const startVersion = chunk.getStartVersion()
|
||||
const endVersion = chunk.getEndVersion()
|
||||
try {
|
||||
await tx('chunks').insert({
|
||||
id: parseInt(chunkId, 10),
|
||||
doc_id: parseInt(projectId, 10),
|
||||
start_version: startVersion,
|
||||
end_version: endVersion,
|
||||
end_timestamp: chunk.getEndTimestamp(),
|
||||
})
|
||||
} catch (err) {
|
||||
if (
|
||||
err instanceof Error &&
|
||||
'code' in err &&
|
||||
err.code === DUPLICATE_KEY_ERROR_CODE
|
||||
) {
|
||||
throw new ChunkVersionConflictError(
|
||||
'chunk start or end version is not unique',
|
||||
{ projectId, chunkId, startVersion, endVersion }
|
||||
)
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that a chunk is not closed
|
||||
*
|
||||
* This is used to synchronize chunk creations and extensions.
|
||||
*
|
||||
* @param {Knex} tx
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
*/
|
||||
async function _assertChunkIsNotClosed(tx, projectId, chunkId) {
|
||||
const record = await tx('chunks')
|
||||
.forUpdate()
|
||||
.select('closed')
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.where('id', parseInt(chunkId, 10))
|
||||
.first()
|
||||
if (!record) {
|
||||
throw new ChunkVersionConflictError('unable to close chunk: not found', {
|
||||
projectId,
|
||||
chunkId,
|
||||
})
|
||||
}
|
||||
if (record.closed) {
|
||||
throw new ChunkVersionConflictError(
|
||||
'unable to close chunk: already closed',
|
||||
{
|
||||
projectId,
|
||||
chunkId,
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a chunk
|
||||
*
|
||||
* A closed chunk can no longer be extended.
|
||||
*
|
||||
* @param {Knex} tx
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
*/
|
||||
async function _closeChunk(tx, projectId, chunkId) {
|
||||
await tx('chunks')
|
||||
.update({ closed: true })
|
||||
.where('doc_id', parseInt(projectId, 10))
|
||||
.where('id', parseInt(chunkId, 10))
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a chunk.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {string} chunkId
|
||||
*/
|
||||
async function deleteChunk(projectId, chunkId) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
assert.integer(chunkId, 'bad chunkId')
|
||||
|
||||
await _deleteChunks(knex, {
|
||||
doc_id: parseInt(projectId, 10),
|
||||
id: parseInt(chunkId, 10),
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all of a project's chunks
|
||||
*
|
||||
* @param {string} projectId
|
||||
*/
|
||||
async function deleteProjectChunks(projectId) {
|
||||
assert.postgresId(projectId, 'bad projectId')
|
||||
|
||||
await knex.transaction(async tx => {
|
||||
await _deleteChunks(knex, { doc_id: parseInt(projectId, 10) })
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete many chunks
|
||||
*
|
||||
* @param {Knex} tx
|
||||
* @param {any} whereClause
|
||||
*/
|
||||
async function _deleteChunks(tx, whereClause) {
|
||||
const rows = await tx('chunks').where(whereClause).del().returning('*')
|
||||
if (rows.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const oldChunks = rows.map(row => ({
|
||||
doc_id: row.doc_id,
|
||||
chunk_id: row.id,
|
||||
start_version: row.start_version,
|
||||
end_version: row.end_version,
|
||||
end_timestamp: row.end_timestamp,
|
||||
deleted_at: tx.fn.now(),
|
||||
}))
|
||||
await tx('old_chunks').insert(oldChunks)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a batch of old chunks for deletion
|
||||
*
|
||||
* @param {number} count
|
||||
* @param {number} minAgeSecs
|
||||
*/
|
||||
async function getOldChunksBatch(count, minAgeSecs) {
|
||||
const maxDeletedAt = new Date(Date.now() - minAgeSecs * 1000)
|
||||
const records = await knex('old_chunks')
|
||||
.whereNull('deleted_at')
|
||||
.orWhere('deleted_at', '<', maxDeletedAt)
|
||||
.orderBy('chunk_id')
|
||||
.limit(count)
|
||||
return records.map(oldChunk => ({
|
||||
projectId: oldChunk.doc_id.toString(),
|
||||
chunkId: oldChunk.chunk_id.toString(),
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a batch of old chunks from the database
|
||||
*
|
||||
* @param {string[]} chunkIds
|
||||
*/
|
||||
async function deleteOldChunks(chunkIds) {
|
||||
await knex('old_chunks')
|
||||
.whereIn(
|
||||
'chunk_id',
|
||||
chunkIds.map(id => parseInt(id, 10))
|
||||
)
|
||||
.del()
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a new project id
|
||||
*/
|
||||
async function generateProjectId() {
|
||||
const record = await knex.first(
|
||||
knex.raw("nextval('docs_id_seq'::regclass)::integer as doc_id")
|
||||
)
|
||||
return record.doc_id.toString()
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getLatestChunk,
|
||||
getFirstChunkBeforeTimestamp,
|
||||
getLastActiveChunkBeforeTimestamp,
|
||||
getChunkForVersion,
|
||||
getChunkForTimestamp,
|
||||
getProjectChunkIds,
|
||||
getProjectChunks,
|
||||
insertPendingChunk,
|
||||
confirmCreate,
|
||||
confirmUpdate,
|
||||
deleteChunk,
|
||||
deleteProjectChunks,
|
||||
getOldChunksBatch,
|
||||
deleteOldChunks,
|
||||
generateProjectId,
|
||||
}
|
||||
254
services/history-v1/storage/lib/chunk_store/redis.js
Normal file
254
services/history-v1/storage/lib/chunk_store/redis.js
Normal file
@@ -0,0 +1,254 @@
|
||||
const metrics = require('@overleaf/metrics')
|
||||
const logger = require('@overleaf/logger')
|
||||
const redis = require('../redis')
|
||||
const rclient = redis.rclientHistory //
|
||||
const { Snapshot, Change, History, Chunk } = require('overleaf-editor-core')
|
||||
|
||||
const TEMPORARY_CACHE_LIFETIME = 300 // 5 minutes
|
||||
|
||||
const keySchema = {
|
||||
snapshot({ projectId }) {
|
||||
return `snapshot:{${projectId}}`
|
||||
},
|
||||
startVersion({ projectId }) {
|
||||
return `snapshot-version:{${projectId}}`
|
||||
},
|
||||
changes({ projectId }) {
|
||||
return `changes:{${projectId}}`
|
||||
},
|
||||
}
|
||||
|
||||
rclient.defineCommand('get_current_chunk', {
|
||||
numberOfKeys: 3,
|
||||
lua: `
|
||||
local startVersionValue = redis.call('GET', KEYS[2])
|
||||
if not startVersionValue then
|
||||
return nil -- this is a cache-miss
|
||||
end
|
||||
local snapshotValue = redis.call('GET', KEYS[1])
|
||||
local changesValues = redis.call('LRANGE', KEYS[3], 0, -1)
|
||||
return {snapshotValue, startVersionValue, changesValues}
|
||||
`,
|
||||
})
|
||||
|
||||
/**
|
||||
* Retrieves the current chunk of project history from Redis storage
|
||||
* @param {string} projectId - The unique identifier of the project
|
||||
* @returns {Promise<Chunk|null>} A Promise that resolves to a Chunk object containing project history,
|
||||
* or null if retrieval fails
|
||||
* @throws {Error} If Redis operations fail
|
||||
*/
|
||||
async function getCurrentChunk(projectId) {
|
||||
try {
|
||||
const result = await rclient.get_current_chunk(
|
||||
keySchema.snapshot({ projectId }),
|
||||
keySchema.startVersion({ projectId }),
|
||||
keySchema.changes({ projectId })
|
||||
)
|
||||
if (!result) {
|
||||
return null // cache-miss
|
||||
}
|
||||
const snapshot = Snapshot.fromRaw(JSON.parse(result[0]))
|
||||
const startVersion = JSON.parse(result[1])
|
||||
const changes = result[2].map(c => Change.fromRaw(JSON.parse(c)))
|
||||
const history = new History(snapshot, changes)
|
||||
const chunk = new Chunk(history, startVersion)
|
||||
metrics.inc('chunk_store.redis.get_current_chunk', 1, { status: 'success' })
|
||||
return chunk
|
||||
} catch (err) {
|
||||
logger.error({ err, projectId }, 'error getting current chunk from redis')
|
||||
metrics.inc('chunk_store.redis.get_current_chunk', 1, { status: 'error' })
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
rclient.defineCommand('get_current_chunk_metadata', {
|
||||
numberOfKeys: 2,
|
||||
lua: `
|
||||
local startVersionValue = redis.call('GET', KEYS[1])
|
||||
local changesCount = redis.call('LLEN', KEYS[2])
|
||||
return {startVersionValue, changesCount}
|
||||
`,
|
||||
})
|
||||
|
||||
/**
|
||||
* Retrieves the current chunk metadata for a given project from Redis
|
||||
* @param {string} projectId - The ID of the project to get metadata for
|
||||
* @returns {Promise<Object|null>} Object containing startVersion and changesCount if found, null on error or cache miss
|
||||
* @property {number} startVersion - The starting version information
|
||||
* @property {number} changesCount - The number of changes in the chunk
|
||||
*/
|
||||
async function getCurrentChunkMetadata(projectId) {
|
||||
try {
|
||||
const result = await rclient.get_current_chunk_metadata(
|
||||
keySchema.startVersion({ projectId }),
|
||||
keySchema.changes({ projectId })
|
||||
)
|
||||
if (!result) {
|
||||
return null // cache-miss
|
||||
}
|
||||
const startVersion = JSON.parse(result[0])
|
||||
const changesCount = parseInt(result[1], 10)
|
||||
return { startVersion, changesCount }
|
||||
} catch (err) {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
rclient.defineCommand('set_current_chunk', {
|
||||
numberOfKeys: 3,
|
||||
lua: `
|
||||
local snapshotValue = ARGV[1]
|
||||
local startVersionValue = ARGV[2]
|
||||
redis.call('SETEX', KEYS[1], ${TEMPORARY_CACHE_LIFETIME}, snapshotValue)
|
||||
redis.call('SETEX', KEYS[2], ${TEMPORARY_CACHE_LIFETIME}, startVersionValue)
|
||||
redis.call('DEL', KEYS[3]) -- clear the old changes list
|
||||
if #ARGV >= 3 then
|
||||
redis.call('RPUSH', KEYS[3], unpack(ARGV, 3))
|
||||
redis.call('EXPIRE', KEYS[3], ${TEMPORARY_CACHE_LIFETIME})
|
||||
end
|
||||
`,
|
||||
})
|
||||
|
||||
/**
|
||||
* Stores the current chunk of project history in Redis
|
||||
* @param {string} projectId - The ID of the project
|
||||
* @param {Chunk} chunk - The chunk object containing history data
|
||||
* @returns {Promise<*>} Returns the result of the Redis operation, or null if an error occurs
|
||||
* @throws {Error} May throw Redis-related errors which are caught internally
|
||||
*/
|
||||
async function setCurrentChunk(projectId, chunk) {
|
||||
try {
|
||||
const snapshotKey = keySchema.snapshot({ projectId })
|
||||
const startVersionKey = keySchema.startVersion({ projectId })
|
||||
const changesKey = keySchema.changes({ projectId })
|
||||
|
||||
const snapshot = chunk.history.snapshot
|
||||
const startVersion = chunk.startVersion
|
||||
const changes = chunk.history.changes
|
||||
|
||||
await rclient.set_current_chunk(
|
||||
snapshotKey,
|
||||
startVersionKey,
|
||||
changesKey,
|
||||
JSON.stringify(snapshot.toRaw()),
|
||||
startVersion,
|
||||
...changes.map(c => JSON.stringify(c.toRaw()))
|
||||
)
|
||||
metrics.inc('chunk_store.redis.set_current_chunk', 1, { status: 'success' })
|
||||
} catch (err) {
|
||||
logger.error(
|
||||
{ err, projectId, chunk },
|
||||
'error setting current chunk inredis'
|
||||
)
|
||||
metrics.inc('chunk_store.redis.set_current_chunk', 1, { status: 'error' })
|
||||
return null // while testing we will suppress any errors
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a cached chunk's version metadata matches the current chunk's metadata
|
||||
* @param {Chunk} cachedChunk - The chunk retrieved from cache
|
||||
* @param {Chunk} currentChunk - The current chunk to compare against
|
||||
* @returns {boolean} - Returns true if the chunks have matching start and end versions, false otherwise
|
||||
*/
|
||||
function checkCacheValidity(cachedChunk, currentChunk) {
|
||||
return Boolean(
|
||||
cachedChunk &&
|
||||
cachedChunk.getStartVersion() === currentChunk.getStartVersion() &&
|
||||
cachedChunk.getEndVersion() === currentChunk.getEndVersion()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if a cached chunk matches the current chunk metadata by comparing versions
|
||||
* @param {Object} cachedChunk - The cached chunk object to validate
|
||||
* @param {Object} currentChunkMetadata - The current chunk metadata to compare against
|
||||
* @param {number} currentChunkMetadata.startVersion - The starting version number
|
||||
* @param {number} currentChunkMetadata.endVersion - The ending version number
|
||||
* @returns {boolean} - True if the cached chunk is valid, false otherwise
|
||||
*/
|
||||
function checkCacheValidityWithMetadata(cachedChunk, currentChunkMetadata) {
|
||||
return Boolean(
|
||||
cachedChunk &&
|
||||
cachedChunk.getStartVersion() === currentChunkMetadata.startVersion &&
|
||||
cachedChunk.getEndVersion() === currentChunkMetadata.endVersion
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two chunks for equality using stringified JSON comparison
|
||||
* @param {string} projectId - The ID of the project
|
||||
* @param {Chunk} cachedChunk - The cached chunk to compare
|
||||
* @param {Chunk} currentChunk - The current chunk to compare against
|
||||
* @returns {boolean} - Returns false if either chunk is null/undefined, otherwise returns the comparison result
|
||||
*/
|
||||
function compareChunks(projectId, cachedChunk, currentChunk) {
|
||||
if (!cachedChunk || !currentChunk) {
|
||||
return false
|
||||
}
|
||||
const identical = JSON.stringify(cachedChunk) === JSON.stringify(currentChunk)
|
||||
if (!identical) {
|
||||
try {
|
||||
logger.error(
|
||||
{
|
||||
projectId,
|
||||
cachedChunkStartVersion: cachedChunk.getStartVersion(),
|
||||
cachedChunkEndVersion: cachedChunk.getEndVersion(),
|
||||
currentChunkStartVersion: currentChunk.getStartVersion(),
|
||||
currentChunkEndVersion: currentChunk.getEndVersion(),
|
||||
},
|
||||
'chunk cache mismatch'
|
||||
)
|
||||
} catch (err) {
|
||||
// ignore errors while logging
|
||||
}
|
||||
}
|
||||
metrics.inc('chunk_store.redis.compare_chunks', 1, {
|
||||
status: identical ? 'success' : 'fail',
|
||||
})
|
||||
return identical
|
||||
}
|
||||
|
||||
// Define Lua script for atomic cache clearing
|
||||
rclient.defineCommand('clear_chunk_cache', {
|
||||
numberOfKeys: 3,
|
||||
lua: `
|
||||
-- Delete all keys related to a project's chunk cache atomically
|
||||
redis.call('DEL', KEYS[1]) -- snapshot key
|
||||
redis.call('DEL', KEYS[2]) -- startVersion key
|
||||
redis.call('DEL', KEYS[3]) -- changes key
|
||||
return 1
|
||||
`,
|
||||
})
|
||||
|
||||
/**
|
||||
* Clears all cache entries for a project's chunk data
|
||||
* @param {string} projectId - The ID of the project whose cache should be cleared
|
||||
* @returns {Promise<boolean>} A promise that resolves to true if successful, false on error
|
||||
*/
|
||||
async function clearCache(projectId) {
|
||||
try {
|
||||
const snapshotKey = keySchema.snapshot({ projectId })
|
||||
const startVersionKey = keySchema.startVersion({ projectId })
|
||||
const changesKey = keySchema.changes({ projectId })
|
||||
|
||||
await rclient.clear_chunk_cache(snapshotKey, startVersionKey, changesKey)
|
||||
metrics.inc('chunk_store.redis.clear_cache', 1, { status: 'success' })
|
||||
return true
|
||||
} catch (err) {
|
||||
logger.error({ err, projectId }, 'error clearing chunk cache from redis')
|
||||
metrics.inc('chunk_store.redis.clear_cache', 1, { status: 'error' })
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getCurrentChunk,
|
||||
setCurrentChunk,
|
||||
getCurrentChunkMetadata,
|
||||
checkCacheValidity,
|
||||
checkCacheValidityWithMetadata,
|
||||
compareChunks,
|
||||
clearCache,
|
||||
}
|
||||
Reference in New Issue
Block a user