first commit
This commit is contained in:
109
services/history-v1/storage/tasks/backfill_start_version.js
Normal file
109
services/history-v1/storage/tasks/backfill_start_version.js
Normal file
@@ -0,0 +1,109 @@
|
||||
const commandLineArgs = require('command-line-args')
|
||||
const BPromise = require('bluebird')
|
||||
const timersPromises = require('node:timers/promises')
|
||||
|
||||
const { knex, historyStore } = require('..')
|
||||
|
||||
const MAX_POSTGRES_INTEGER = 2147483647
|
||||
const DEFAULT_BATCH_SIZE = 1000
|
||||
const DEFAULT_CONCURRENCY = 1
|
||||
const MAX_RETRIES = 10
|
||||
const RETRY_DELAY_MS = 5000
|
||||
|
||||
async function main() {
|
||||
const options = parseOptions()
|
||||
let batchStart = options.minId
|
||||
while (batchStart <= options.maxId) {
|
||||
const chunks = await getChunks(batchStart, options.maxId, options.batchSize)
|
||||
if (chunks.length === 0) {
|
||||
// No results. We're done.
|
||||
break
|
||||
}
|
||||
const batchEnd = chunks[chunks.length - 1].id
|
||||
await processBatch(chunks, options)
|
||||
console.log(`Processed chunks ${batchStart} to ${batchEnd}`)
|
||||
batchStart = batchEnd + 1
|
||||
}
|
||||
}
|
||||
|
||||
function parseOptions() {
|
||||
const args = commandLineArgs([
|
||||
{ name: 'min-id', type: Number, defaultValue: 1 },
|
||||
{
|
||||
name: 'max-id',
|
||||
type: Number,
|
||||
defaultValue: MAX_POSTGRES_INTEGER,
|
||||
},
|
||||
{ name: 'batch-size', type: Number, defaultValue: DEFAULT_BATCH_SIZE },
|
||||
{ name: 'concurrency', type: Number, defaultValue: DEFAULT_CONCURRENCY },
|
||||
])
|
||||
return {
|
||||
minId: args['min-id'],
|
||||
maxId: args['max-id'],
|
||||
batchSize: args['batch-size'],
|
||||
concurrency: args.concurrency,
|
||||
}
|
||||
}
|
||||
|
||||
async function getChunks(minId, maxId, batchSize) {
|
||||
const chunks = await knex('chunks')
|
||||
.where('id', '>=', minId)
|
||||
.andWhere('id', '<=', maxId)
|
||||
.orderBy('id')
|
||||
.limit(batchSize)
|
||||
return chunks
|
||||
}
|
||||
|
||||
async function processBatch(chunks, options) {
|
||||
let retries = 0
|
||||
while (true) {
|
||||
const results = await BPromise.map(chunks, processChunk, {
|
||||
concurrency: options.concurrency,
|
||||
})
|
||||
const failedChunks = results
|
||||
.filter(result => !result.success)
|
||||
.map(result => result.chunk)
|
||||
if (failedChunks.length === 0) {
|
||||
// All chunks processed. Carry on.
|
||||
break
|
||||
}
|
||||
|
||||
// Some projects failed. Retry.
|
||||
retries += 1
|
||||
if (retries > MAX_RETRIES) {
|
||||
console.log('Too many retries processing chunks. Giving up.')
|
||||
process.exit(1)
|
||||
}
|
||||
console.log(
|
||||
`Retrying chunks: ${failedChunks.map(chunk => chunk.id).join(', ')}`
|
||||
)
|
||||
await timersPromises.setTimeout(RETRY_DELAY_MS)
|
||||
chunks = failedChunks
|
||||
}
|
||||
}
|
||||
|
||||
async function processChunk(chunk) {
|
||||
try {
|
||||
const rawHistory = await historyStore.loadRaw(
|
||||
chunk.doc_id.toString(),
|
||||
chunk.id
|
||||
)
|
||||
const startVersion = chunk.end_version - rawHistory.changes.length
|
||||
await knex('chunks')
|
||||
.where('id', chunk.id)
|
||||
.update({ start_version: startVersion })
|
||||
return { chunk, success: true }
|
||||
} catch (err) {
|
||||
console.error(`Failed to process chunk ${chunk.id}:`, err.stack)
|
||||
return { chunk, success: false }
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
process.exit()
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
107
services/history-v1/storage/tasks/compress_changes.js
Normal file
107
services/history-v1/storage/tasks/compress_changes.js
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* Compress changes for projects that have too many text operations.
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* node tasks/compress_changes.js CSV_FILE
|
||||
*
|
||||
* where CSV_FILE contains a list of project ids in the first column
|
||||
*/
|
||||
|
||||
const fs = require('node:fs')
|
||||
const BPromise = require('bluebird')
|
||||
const { History } = require('overleaf-editor-core')
|
||||
const { historyStore, chunkStore } = require('..')
|
||||
|
||||
const CONCURRENCY = 10
|
||||
|
||||
async function main() {
|
||||
const filename = process.argv[2]
|
||||
const projectIds = await readCsv(filename)
|
||||
const chunks = []
|
||||
for (const projectId of projectIds) {
|
||||
const chunkIds = await chunkStore.getProjectChunkIds(projectId)
|
||||
chunks.push(...chunkIds.map(id => ({ id, projectId })))
|
||||
}
|
||||
let totalCompressed = 0
|
||||
await BPromise.map(
|
||||
chunks,
|
||||
async chunk => {
|
||||
try {
|
||||
const history = await getHistory(chunk)
|
||||
const numCompressed = compressChanges(history)
|
||||
if (numCompressed > 0) {
|
||||
await storeHistory(chunk, history)
|
||||
console.log(
|
||||
`Compressed project ${chunk.projectId}, chunk ${chunk.id}`
|
||||
)
|
||||
}
|
||||
totalCompressed += numCompressed
|
||||
} catch (err) {
|
||||
console.log(err)
|
||||
}
|
||||
},
|
||||
{ concurrency: CONCURRENCY }
|
||||
)
|
||||
console.log('CHANGES:', totalCompressed)
|
||||
}
|
||||
|
||||
async function readCsv(filename) {
|
||||
const csv = await fs.promises.readFile(filename, 'utf-8')
|
||||
const lines = csv.trim().split('\n')
|
||||
const projectIds = lines.map(line => line.split(',')[0])
|
||||
return projectIds
|
||||
}
|
||||
|
||||
async function getHistory(chunk) {
|
||||
const rawHistory = await historyStore.loadRaw(chunk.projectId, chunk.id)
|
||||
const history = History.fromRaw(rawHistory)
|
||||
return history
|
||||
}
|
||||
|
||||
async function storeHistory(chunk, history) {
|
||||
const rawHistory = history.toRaw()
|
||||
await historyStore.storeRaw(chunk.projectId, chunk.id, rawHistory)
|
||||
}
|
||||
|
||||
function compressChanges(history) {
|
||||
let numCompressed = 0
|
||||
for (const change of history.getChanges()) {
|
||||
const newOperations = compressOperations(change.operations)
|
||||
if (newOperations.length !== change.operations.length) {
|
||||
numCompressed++
|
||||
}
|
||||
change.setOperations(newOperations)
|
||||
}
|
||||
return numCompressed
|
||||
}
|
||||
|
||||
function compressOperations(operations) {
|
||||
if (!operations.length) return []
|
||||
|
||||
const newOperations = []
|
||||
let currentOperation = operations[0]
|
||||
for (let operationId = 1; operationId < operations.length; operationId++) {
|
||||
const nextOperation = operations[operationId]
|
||||
if (currentOperation.canBeComposedWith(nextOperation)) {
|
||||
currentOperation = currentOperation.compose(nextOperation)
|
||||
} else {
|
||||
// currentOperation and nextOperation cannot be composed. Push the
|
||||
// currentOperation and start over with nextOperation.
|
||||
newOperations.push(currentOperation)
|
||||
currentOperation = nextOperation
|
||||
}
|
||||
}
|
||||
newOperations.push(currentOperation)
|
||||
|
||||
return newOperations
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
process.exit()
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
294
services/history-v1/storage/tasks/copy_project_blobs.js
Executable file
294
services/history-v1/storage/tasks/copy_project_blobs.js
Executable file
@@ -0,0 +1,294 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const { promisify } = require('node:util')
|
||||
const BPromise = require('bluebird')
|
||||
const commandLineArgs = require('command-line-args')
|
||||
const config = require('config')
|
||||
const fs = require('node:fs')
|
||||
const readline = require('node:readline')
|
||||
const { History } = require('overleaf-editor-core')
|
||||
const { knex, historyStore, persistor } = require('..')
|
||||
const projectKey = require('../lib/project_key')
|
||||
|
||||
const MAX_POSTGRES_INTEGER = 2147483647
|
||||
const DEFAULT_BATCH_SIZE = 1000
|
||||
const MAX_RETRIES = 10
|
||||
const RETRY_DELAY_MS = 5000
|
||||
|
||||
// Obtain a preconfigured GCS client through a non-documented property of
|
||||
// object-persistor. Sorry about that. We need the GCS client because we use
|
||||
// operations that are not implemented in object-persistor.
|
||||
const gcsClient = persistor.storage
|
||||
const globalBucket = gcsClient.bucket(config.get('blobStore.globalBucket'))
|
||||
const projectBucket = gcsClient.bucket(config.get('blobStore.projectBucket'))
|
||||
const delay = promisify(setTimeout)
|
||||
|
||||
async function main() {
|
||||
const options = commandLineArgs([
|
||||
{ name: 'global-blobs', type: String },
|
||||
{ name: 'min-project-id', type: Number, defaultValue: 1 },
|
||||
{
|
||||
name: 'max-project-id',
|
||||
type: Number,
|
||||
defaultValue: MAX_POSTGRES_INTEGER,
|
||||
},
|
||||
{ name: 'batch-size', type: Number, defaultValue: DEFAULT_BATCH_SIZE },
|
||||
{ name: 'concurrency', type: Number, defaultValue: 1 },
|
||||
])
|
||||
if (!options['global-blobs']) {
|
||||
console.error(
|
||||
'You must specify a global blobs file with the --global-blobs option'
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
const globalBlobs = await readGlobalBlobs(options['global-blobs'])
|
||||
const minProjectId = options['min-project-id']
|
||||
const maxProjectId = options['max-project-id']
|
||||
const batchSize = options['batch-size']
|
||||
const concurrency = options.concurrency
|
||||
console.log(`Keeping ${globalBlobs.size} global blobs`)
|
||||
await run({ globalBlobs, minProjectId, maxProjectId, batchSize, concurrency })
|
||||
console.log('Done.')
|
||||
}
|
||||
|
||||
async function readGlobalBlobs(filename) {
|
||||
const stream = fs.createReadStream(filename)
|
||||
const reader = readline.createInterface({
|
||||
input: stream,
|
||||
crlfDelay: Infinity,
|
||||
})
|
||||
const blobs = new Set()
|
||||
for await (const line of reader) {
|
||||
blobs.add(line.trim())
|
||||
}
|
||||
return blobs
|
||||
}
|
||||
|
||||
async function run(options) {
|
||||
const { globalBlobs, minProjectId, maxProjectId, batchSize, concurrency } =
|
||||
options
|
||||
let batchStart = minProjectId
|
||||
while (batchStart <= maxProjectId) {
|
||||
let projectIds = await getProjectIds(batchStart, maxProjectId, batchSize)
|
||||
if (projectIds.length === 0) {
|
||||
break
|
||||
}
|
||||
const batchEnd = projectIds[projectIds.length - 1]
|
||||
console.log(`Processing projects ${batchStart} to ${batchEnd}`)
|
||||
const chunkIdsByProject = await getChunkIdsByProject(projectIds)
|
||||
|
||||
let retries = 0
|
||||
while (true) {
|
||||
const results = await BPromise.map(
|
||||
projectIds,
|
||||
async projectId =>
|
||||
await processProject(
|
||||
projectId,
|
||||
chunkIdsByProject.get(projectId),
|
||||
globalBlobs
|
||||
),
|
||||
{ concurrency }
|
||||
)
|
||||
const failedProjectIds = results
|
||||
.filter(result => !result.success)
|
||||
.map(result => result.projectId)
|
||||
if (failedProjectIds.length === 0) {
|
||||
// All projects were copied successfully. Carry on.
|
||||
break
|
||||
}
|
||||
|
||||
// Some projects failed. Retry.
|
||||
retries += 1
|
||||
if (retries > MAX_RETRIES) {
|
||||
console.log(
|
||||
`Too many retries processing projects ${batchStart} to ${batchEnd}. Giving up.`
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
console.log(`Retrying projects: ${failedProjectIds.join(', ')}`)
|
||||
await delay(RETRY_DELAY_MS)
|
||||
projectIds = failedProjectIds
|
||||
}
|
||||
|
||||
// Set up next batch
|
||||
batchStart = batchEnd + 1
|
||||
}
|
||||
}
|
||||
|
||||
async function getProjectIds(minProjectId, maxProjectId, batchSize) {
|
||||
const projectIds = await knex('chunks')
|
||||
.distinct('doc_id')
|
||||
.where('doc_id', '>=', minProjectId)
|
||||
.andWhere('doc_id', '<=', maxProjectId)
|
||||
.orderBy('doc_id')
|
||||
.limit(batchSize)
|
||||
.pluck('doc_id')
|
||||
return projectIds
|
||||
}
|
||||
|
||||
async function getChunkIdsByProject(projectIds) {
|
||||
const chunks = await knex('chunks')
|
||||
.select('id', { projectId: 'doc_id' })
|
||||
.where('doc_id', 'in', projectIds)
|
||||
const chunkIdsByProject = new Map()
|
||||
for (const projectId of projectIds) {
|
||||
chunkIdsByProject.set(projectId, [])
|
||||
}
|
||||
for (const chunk of chunks) {
|
||||
chunkIdsByProject.get(chunk.projectId).push(chunk.id)
|
||||
}
|
||||
return chunkIdsByProject
|
||||
}
|
||||
|
||||
async function processProject(projectId, chunkIds, globalBlobs) {
|
||||
try {
|
||||
const blobHashes = await getBlobHashes(projectId, chunkIds)
|
||||
const projectBlobHashes = blobHashes.filter(hash => !globalBlobs.has(hash))
|
||||
const gcsSizesByHash = new Map()
|
||||
for (const blobHash of projectBlobHashes) {
|
||||
const blobSize = await copyBlobInGcs(projectId, blobHash)
|
||||
if (blobSize != null) {
|
||||
gcsSizesByHash.set(blobHash, blobSize)
|
||||
}
|
||||
}
|
||||
const dbSizesByHash = await copyBlobsInDatabase(
|
||||
projectId,
|
||||
projectBlobHashes
|
||||
)
|
||||
compareBlobSizes(gcsSizesByHash, dbSizesByHash)
|
||||
return { projectId, success: true }
|
||||
} catch (err) {
|
||||
console.error(`Failed to process project ${projectId}:`, err.stack)
|
||||
return { projectId, success: false }
|
||||
}
|
||||
}
|
||||
|
||||
function compareBlobSizes(gcsSizesByHash, dbSizesByHash) {
|
||||
// Throw an error if the database doesn't report as many blobs as GCS
|
||||
if (dbSizesByHash.size !== gcsSizesByHash.size) {
|
||||
throw new Error(
|
||||
`the database reported ${dbSizesByHash.size} blobs copied, but GCS reported ${gcsSizesByHash.size} blobs copied`
|
||||
)
|
||||
}
|
||||
|
||||
const mismatches = []
|
||||
for (const [hash, dbSize] of dbSizesByHash.entries()) {
|
||||
if (gcsSizesByHash.get(hash) !== dbSize) {
|
||||
mismatches.push(hash)
|
||||
}
|
||||
}
|
||||
if (mismatches.length > 0) {
|
||||
throw new Error(`blob size mismatch for hashes: ${mismatches.join(', ')}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function getHistory(projectId, chunkId) {
|
||||
const rawHistory = await historyStore.loadRaw(projectId, chunkId)
|
||||
const history = History.fromRaw(rawHistory)
|
||||
return history
|
||||
}
|
||||
|
||||
async function getBlobHashes(projectId, chunkIds) {
|
||||
const blobHashes = new Set()
|
||||
for (const chunkId of chunkIds) {
|
||||
const history = await getHistory(projectId, chunkId)
|
||||
history.findBlobHashes(blobHashes)
|
||||
}
|
||||
return Array.from(blobHashes)
|
||||
}
|
||||
|
||||
async function copyBlobInGcs(projectId, blobHash) {
|
||||
const globalBlobKey = [
|
||||
blobHash.slice(0, 2),
|
||||
blobHash.slice(2, 4),
|
||||
blobHash.slice(4),
|
||||
].join('/')
|
||||
const projectBlobKey = [
|
||||
projectKey.format(projectId),
|
||||
blobHash.slice(0, 2),
|
||||
blobHash.slice(2),
|
||||
].join('/')
|
||||
const globalBlobObject = globalBucket.file(globalBlobKey)
|
||||
const projectBlobObject = projectBucket.file(projectBlobKey)
|
||||
|
||||
// Check if the project blob exists
|
||||
let projectBlobMetadata = null
|
||||
try {
|
||||
;[projectBlobMetadata] = await projectBlobObject.getMetadata()
|
||||
} catch (err) {
|
||||
if (err.code !== 404) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the blob exists
|
||||
let globalBlobMetadata = null
|
||||
try {
|
||||
;[globalBlobMetadata] = await globalBlobObject.getMetadata()
|
||||
} catch (err) {
|
||||
if (err.code !== 404) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
if (projectBlobMetadata) {
|
||||
// Project blob already exists. Compare the metadata if the global blob
|
||||
// also exists and return early.
|
||||
if (
|
||||
globalBlobMetadata != null &&
|
||||
(globalBlobMetadata.size !== projectBlobMetadata.size ||
|
||||
globalBlobMetadata.md5Hash !== projectBlobMetadata.md5Hash)
|
||||
) {
|
||||
throw new Error(
|
||||
`Project blob ${blobHash} in project ${projectId} doesn't match global blob`
|
||||
)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
await globalBlobObject.copy(projectBlobObject)
|
||||
|
||||
// Paranoid check that the copy went well. The getMetadata() method returns
|
||||
// an array, with the metadata in first position.
|
||||
;[projectBlobMetadata] = await projectBlobObject.getMetadata()
|
||||
if (
|
||||
globalBlobMetadata.size !== projectBlobMetadata.size ||
|
||||
globalBlobMetadata.md5Hash !== projectBlobMetadata.md5Hash
|
||||
) {
|
||||
throw new Error(`Failed to copy blob ${blobHash} to project ${projectId})`)
|
||||
}
|
||||
|
||||
return parseInt(projectBlobMetadata.size, 10)
|
||||
}
|
||||
|
||||
async function copyBlobsInDatabase(projectId, blobHashes) {
|
||||
const blobSizesByHash = new Map()
|
||||
if (blobHashes.length === 0) {
|
||||
return blobSizesByHash
|
||||
}
|
||||
const binaryBlobHashes = blobHashes.map(hash => Buffer.from(hash, 'hex'))
|
||||
const result = await knex.raw(
|
||||
`INSERT INTO project_blobs (
|
||||
project_id, hash_bytes, byte_length, string_length
|
||||
)
|
||||
SELECT ?, hash_bytes, byte_length, string_length
|
||||
FROM blobs
|
||||
WHERE hash_bytes IN (${binaryBlobHashes.map(_ => '?').join(',')})
|
||||
ON CONFLICT (project_id, hash_bytes) DO NOTHING
|
||||
RETURNING hash_bytes, byte_length`,
|
||||
[projectId, ...binaryBlobHashes]
|
||||
)
|
||||
for (const row of result.rows) {
|
||||
blobSizesByHash.set(row.hash_bytes.toString('hex'), row.byte_length)
|
||||
}
|
||||
return blobSizesByHash
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
process.exit()
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
36
services/history-v1/storage/tasks/delete_old_chunks.js
Normal file
36
services/history-v1/storage/tasks/delete_old_chunks.js
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
'use strict'
|
||||
|
||||
const commandLineArgs = require('command-line-args')
|
||||
const { chunkStore } = require('../')
|
||||
|
||||
async function deleteOldChunks(options) {
|
||||
const deletedChunksTotal = await chunkStore.deleteOldChunks(options)
|
||||
console.log(`Deleted ${deletedChunksTotal} old chunks`)
|
||||
}
|
||||
|
||||
exports.deleteOldChunks = deleteOldChunks
|
||||
|
||||
if (require.main === module) {
|
||||
const options = commandLineArgs([
|
||||
{ name: 'batch-size', type: Number },
|
||||
{ name: 'max-batches', type: Number },
|
||||
{ name: 'min-age', type: Number },
|
||||
{ name: 'timeout', type: Number },
|
||||
{ name: 'verbose', type: Boolean, alias: 'v', defaultValue: false },
|
||||
])
|
||||
deleteOldChunks({
|
||||
batchSize: options['batch-size'],
|
||||
maxBatches: options['max-batches'],
|
||||
timeout: options.timeout,
|
||||
minAgeSecs: options['min-age'],
|
||||
})
|
||||
.then(() => {
|
||||
process.exit()
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
}
|
156
services/history-v1/storage/tasks/fix_duplicate_versions.js
Executable file
156
services/history-v1/storage/tasks/fix_duplicate_versions.js
Executable file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
'use strict'
|
||||
|
||||
const commandLineArgs = require('command-line-args')
|
||||
const { chunkStore } = require('..')
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
|
||||
async function main() {
|
||||
const opts = commandLineArgs([
|
||||
{ name: 'project-ids', type: String, multiple: true, defaultOption: true },
|
||||
{ name: 'save', type: Boolean, defaultValue: false },
|
||||
{ name: 'help', type: Boolean, defaultValue: false },
|
||||
])
|
||||
if (opts.help || opts['project-ids'] == null) {
|
||||
console.log('Usage: fix_duplicate_versions [--save] PROJECT_ID...')
|
||||
process.exit()
|
||||
}
|
||||
for (const projectId of opts['project-ids']) {
|
||||
await processProject(projectId, opts.save)
|
||||
}
|
||||
if (!opts.save) {
|
||||
console.log('\nThis was a dry run. Re-run with --save to persist changes.')
|
||||
}
|
||||
}
|
||||
|
||||
async function processProject(projectId, save) {
|
||||
console.log(`Project ${projectId}:`)
|
||||
const chunk = await chunkStore.loadLatest(projectId)
|
||||
let numChanges = 0
|
||||
numChanges += removeDuplicateProjectVersions(chunk)
|
||||
numChanges += removeDuplicateDocVersions(chunk)
|
||||
console.log(` ${numChanges > 0 ? numChanges : 'no'} changes`)
|
||||
if (save && numChanges > 0) {
|
||||
await replaceChunk(projectId, chunk)
|
||||
}
|
||||
}
|
||||
|
||||
function removeDuplicateProjectVersions(chunk) {
|
||||
let numChanges = 0
|
||||
let lastVersion = null
|
||||
const { snapshot, changes } = chunk.history
|
||||
if (snapshot.projectVersion != null) {
|
||||
lastVersion = snapshot.projectVersion
|
||||
}
|
||||
for (const change of changes) {
|
||||
if (change.projectVersion == null) {
|
||||
// Not a project structure change. Ignore.
|
||||
continue
|
||||
}
|
||||
if (
|
||||
lastVersion != null &&
|
||||
!areProjectVersionsIncreasing(lastVersion, change.projectVersion)
|
||||
) {
|
||||
// Duplicate. Remove all ops
|
||||
console.log(
|
||||
` Removing out-of-order project structure change: ${change.projectVersion} <= ${lastVersion}`
|
||||
)
|
||||
change.setOperations([])
|
||||
delete change.projectVersion
|
||||
numChanges++
|
||||
} else {
|
||||
lastVersion = change.projectVersion
|
||||
}
|
||||
}
|
||||
|
||||
return numChanges
|
||||
}
|
||||
|
||||
function removeDuplicateDocVersions(chunk) {
|
||||
let numChanges = 0
|
||||
const lastVersions = new Map()
|
||||
const { snapshot, changes } = chunk.history
|
||||
if (snapshot.v2DocVersions != null) {
|
||||
for (const { pathname, v } of Object.values(snapshot.v2DocVersions.data)) {
|
||||
lastVersions.set(pathname, v)
|
||||
}
|
||||
}
|
||||
for (const change of changes) {
|
||||
if (change.v2DocVersions == null) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Collect all docs that have problematic versions
|
||||
const badPaths = []
|
||||
const badDocIds = []
|
||||
for (const [docId, { pathname, v }] of Object.entries(
|
||||
change.v2DocVersions.data
|
||||
)) {
|
||||
const lastVersion = lastVersions.get(docId)
|
||||
if (lastVersion != null && v <= lastVersion) {
|
||||
// Duplicate. Remove ops related to that doc
|
||||
console.log(
|
||||
` Removing out-of-order change for doc ${docId} (${pathname}): ${v} <= ${lastVersion}`
|
||||
)
|
||||
badPaths.push(pathname)
|
||||
badDocIds.push(docId)
|
||||
numChanges++
|
||||
} else {
|
||||
lastVersions.set(docId, v)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove bad operations
|
||||
if (badPaths.length > 0) {
|
||||
change.setOperations(
|
||||
change.operations.filter(
|
||||
op => op.pathname == null || !badPaths.includes(op.pathname)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
// Remove bad v2 doc versions
|
||||
for (const docId of badDocIds) {
|
||||
delete change.v2DocVersions.data[docId]
|
||||
}
|
||||
}
|
||||
|
||||
return numChanges
|
||||
}
|
||||
|
||||
function areProjectVersionsIncreasing(v1Str, v2Str) {
|
||||
const v1 = parseProjectVersion(v1Str)
|
||||
const v2 = parseProjectVersion(v2Str)
|
||||
return v2.major > v1.major || (v2.major === v1.major && v2.minor > v1.minor)
|
||||
}
|
||||
|
||||
function parseProjectVersion(version) {
|
||||
const [major, minor] = version.split('.').map(x => parseInt(x, 10))
|
||||
if (isNaN(major) || isNaN(minor)) {
|
||||
throw new Error(`Invalid project version: ${version}`)
|
||||
}
|
||||
return { major, minor }
|
||||
}
|
||||
|
||||
async function replaceChunk(projectId, chunk) {
|
||||
const endVersion = chunk.getEndVersion()
|
||||
const oldChunkId = await chunkStore.getChunkIdForVersion(
|
||||
projectId,
|
||||
endVersion
|
||||
)
|
||||
console.log(` Replacing chunk ${oldChunkId}`)
|
||||
// The chunks table has a unique constraint on doc_id and end_version. Because
|
||||
// we're replacing a chunk with the same end version, we need to destroy the
|
||||
// chunk first.
|
||||
await chunkStore.destroy(projectId, oldChunkId)
|
||||
await chunkStore.create(projectId, chunk)
|
||||
}
|
1
services/history-v1/storage/tasks/index.js
Normal file
1
services/history-v1/storage/tasks/index.js
Normal file
@@ -0,0 +1 @@
|
||||
exports.deleteOldChunks = require('./delete_old_chunks').deleteOldChunks
|
Reference in New Issue
Block a user