2025-04-24 13:11:28 +08:00

279 lines
8.4 KiB
JavaScript

import { expect } from 'chai'
import Crypto from 'node:crypto'
import Stream from 'node:stream'
import {
makeBlobForFile,
getStringLengthOfFile,
makeProjectKey,
BlobStore,
} from '../../../../storage/lib/blob_store/index.js'
import { Blob } from 'overleaf-editor-core'
import { insertBlob } from '../../../../storage/lib/blob_store/mongo.js'
import {
backupBlob,
downloadBlobToDir,
} from '../../../../storage/lib/backupBlob.mjs'
import fs from 'node:fs'
import path from 'node:path'
import os from 'node:os'
import fsExtra from 'fs-extra'
import { backedUpBlobs, projects } from '../../../../storage/lib/mongodb.js'
import { Binary, ObjectId } from 'mongodb'
import {
backupPersistor,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { WritableBuffer } from '@overleaf/stream-utils'
import cleanup from './support/cleanup.js'
async function listS3BucketRaw(bucket) {
const client = backupPersistor._getClientForBucket(bucket)
return await client.listObjectsV2({ Bucket: bucket }).promise()
}
async function listS3Bucket(bucket, wantStorageClass) {
const response = await listS3BucketRaw(bucket)
for (const object of response.Contents || []) {
if (wantStorageClass) {
expect(object).to.have.property('StorageClass', wantStorageClass)
}
}
return (response.Contents || []).map(item => item.Key || '')
}
describe('backupBlob', function () {
let filePath
let tmpDir
before(async function () {
tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'temp-test-'))
filePath = path.join(tmpDir, 'test.txt')
await fs.promises.writeFile(filePath, 'test')
})
after(async function () {
try {
fsExtra.remove(tmpDir)
} catch (err) {
if (err.code !== 'ENOENT') {
console.log('failed to delete temporary file')
}
}
})
beforeEach(cleanup.everything)
describe('when the blob is already backed up', function () {
let blob
let historyId
beforeEach(async function () {
blob = await makeBlobForFile(filePath)
historyId = 'abc123def456abc789def123'
await backedUpBlobs.updateOne(
{
_id: new ObjectId(historyId),
},
{
$set: { blobs: [new Binary(Buffer.from(blob.getHash(), 'hex'))] },
},
{ upsert: true }
)
await backupBlob(historyId, blob, filePath)
})
it('does not upload the blob', async function () {
const bucketContents = await listS3Bucket(projectBlobsBucket)
expect(bucketContents).to.have.lengthOf(0)
})
})
describe('when the historyId is for a postgres project', function () {
let blob
let historyId
const projectId = new ObjectId()
beforeEach(async function () {
blob = await makeBlobForFile(filePath)
historyId = '123'
await projects.insertOne({
_id: projectId,
overleaf: { history: { id: 123 } },
})
await backupBlob(historyId, blob, filePath)
})
afterEach(async function () {
await projects.deleteOne({
_id: projectId,
})
})
it('uploads the blob to the backup', async function () {
const bucketContents = await listS3Bucket(projectBlobsBucket)
expect(bucketContents).to.have.lengthOf(1)
})
it('stores the backup', async function () {
expect(
await backedUpBlobs.findOne({
_id: projectId,
blobs: {
$elemMatch: { $eq: new Binary(Buffer.from(blob.getHash(), 'hex')) },
},
})
).to.exist
})
})
describe('when the blob is not already backed up', function () {
let blob
let historyId
beforeEach(async function () {
blob = await makeBlobForFile(filePath)
historyId = 'abc123def456abc789def123'
await backupBlob(historyId, blob, filePath)
})
it('uploads the blob to the backup', async function () {
const bucketContents = await listS3Bucket(projectBlobsBucket)
expect(bucketContents).to.have.lengthOf(1)
})
it('stores the backup', async function () {
expect(
await backedUpBlobs.findOne({
_id: new ObjectId(historyId),
blobs: {
$elemMatch: { $eq: new Binary(Buffer.from(blob.getHash(), 'hex')) },
},
})
).to.exist
})
})
const cases = [
{
name: 'text file',
content: Buffer.from('x'.repeat(1000)),
storedSize: 29, // zlib.gzipSync(content).byteLength
},
{
name: 'large text file',
// 'ä' is a 2-byte utf-8 character -> 4MB.
content: Buffer.from('ü'.repeat(2 * 1024 * 1024)),
storedSize: 4101, // zlib.gzipSync(content).byteLength
},
{
name: 'binary file',
content: Buffer.from([0, 1, 2, 3]),
storedSize: 4,
},
{
name: 'large binary file',
content: Crypto.randomBytes(10 * 1024 * 1024),
storedSize: 10 * 1024 * 1024,
},
]
for (const { name, content, storedSize } of cases) {
describe(name, function () {
let blob
let key
let historyId
beforeEach(async function () {
historyId = 'abc123def456abc789def123'
await fs.promises.writeFile(filePath, content)
blob = await makeBlobForFile(filePath)
blob.setStringLength(
await getStringLengthOfFile(blob.getByteLength(), filePath)
)
key = makeProjectKey(historyId, blob.getHash())
await backupBlob(historyId, blob, filePath)
})
it('should upload the blob', async function () {
const response = await listS3BucketRaw(projectBlobsBucket)
expect(response.Contents).to.have.length(1)
expect(response.Contents[0].Key).to.equal(key)
expect(response.Contents[0].Size).to.equal(storedSize)
})
it('should read back the same content', async function () {
const buf = new WritableBuffer()
await Stream.promises.pipeline(
await backupPersistor.getObjectStream(projectBlobsBucket, key, {
autoGunzip: true,
}),
buf
)
expect(buf.getContents()).to.deep.equal(content)
})
})
}
})
describe('downloadBlobToDir', function () {
let tmpDirDownload
const historyId = 'abc123def456abc789def123'
before(async function () {
tmpDirDownload = await fs.promises.mkdtemp(
path.join(os.tmpdir(), 'downloadBlobTest-')
)
})
after(async function () {
await fs.promises.rm(tmpDirDownload, { recursive: true, force: true })
})
it('should download the blob successfully', async function () {
const data = 'hello world'
// Use putString instead of writing a source file and using makeBlobForFile
const blobStore = new BlobStore(historyId)
const blob = await blobStore.putString(data)
// Now call downloadBlobToDir which will use blobStore.getStream internally
const downloadedFilePath = await downloadBlobToDir(
historyId,
blob,
tmpDirDownload
)
const contents = await fs.promises.readFile(downloadedFilePath, 'utf8')
expect(contents).to.equal(data)
})
it('should delete the file on error (if file already exists)', async function () {
const data = 'data that will not be written'
const blobStore = new BlobStore(historyId)
const blob = await blobStore.putString(data)
const hash = blob.getHash()
const fileName = `${historyId}-${hash}`
// Pre-create the destination file to trigger a failure due to an existing file
const downloadedFilePath = path.join(tmpDirDownload, fileName)
await fs.promises.writeFile(downloadedFilePath, 'preexisting content')
try {
await downloadBlobToDir(historyId, blob, tmpDirDownload)
expect.fail('should not reach here')
} catch (error) {
// Check that the file was deleted
await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
}
})
it('should not leave an empty file if download fails', async function () {
// Create a blob with a hash that does not exist in the blob store
const hash = '0000000000000000000000000000000000000000'
const blob = new Blob(hash, 12, 12)
await insertBlob(historyId, blob)
const fileName = `${historyId}-${hash}`
try {
await downloadBlobToDir(historyId, blob, tmpDirDownload)
expect.fail('should not reach here')
} catch (error) {
expect(error).to.be.instanceOf(Blob.NotFoundError)
const downloadedFilePath = path.join(tmpDirDownload, fileName)
// Check that the file was deleted
await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
}
})
})