279 lines
8.4 KiB
JavaScript
279 lines
8.4 KiB
JavaScript
import { expect } from 'chai'
|
|
import Crypto from 'node:crypto'
|
|
import Stream from 'node:stream'
|
|
import {
|
|
makeBlobForFile,
|
|
getStringLengthOfFile,
|
|
makeProjectKey,
|
|
BlobStore,
|
|
} from '../../../../storage/lib/blob_store/index.js'
|
|
import { Blob } from 'overleaf-editor-core'
|
|
import { insertBlob } from '../../../../storage/lib/blob_store/mongo.js'
|
|
import {
|
|
backupBlob,
|
|
downloadBlobToDir,
|
|
} from '../../../../storage/lib/backupBlob.mjs'
|
|
import fs from 'node:fs'
|
|
import path from 'node:path'
|
|
import os from 'node:os'
|
|
import fsExtra from 'fs-extra'
|
|
import { backedUpBlobs, projects } from '../../../../storage/lib/mongodb.js'
|
|
import { Binary, ObjectId } from 'mongodb'
|
|
import {
|
|
backupPersistor,
|
|
projectBlobsBucket,
|
|
} from '../../../../storage/lib/backupPersistor.mjs'
|
|
import { WritableBuffer } from '@overleaf/stream-utils'
|
|
import cleanup from './support/cleanup.js'
|
|
|
|
async function listS3BucketRaw(bucket) {
|
|
const client = backupPersistor._getClientForBucket(bucket)
|
|
return await client.listObjectsV2({ Bucket: bucket }).promise()
|
|
}
|
|
|
|
async function listS3Bucket(bucket, wantStorageClass) {
|
|
const response = await listS3BucketRaw(bucket)
|
|
for (const object of response.Contents || []) {
|
|
if (wantStorageClass) {
|
|
expect(object).to.have.property('StorageClass', wantStorageClass)
|
|
}
|
|
}
|
|
|
|
return (response.Contents || []).map(item => item.Key || '')
|
|
}
|
|
|
|
describe('backupBlob', function () {
|
|
let filePath
|
|
let tmpDir
|
|
|
|
before(async function () {
|
|
tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'temp-test-'))
|
|
filePath = path.join(tmpDir, 'test.txt')
|
|
await fs.promises.writeFile(filePath, 'test')
|
|
})
|
|
|
|
after(async function () {
|
|
try {
|
|
fsExtra.remove(tmpDir)
|
|
} catch (err) {
|
|
if (err.code !== 'ENOENT') {
|
|
console.log('failed to delete temporary file')
|
|
}
|
|
}
|
|
})
|
|
|
|
beforeEach(cleanup.everything)
|
|
|
|
describe('when the blob is already backed up', function () {
|
|
let blob
|
|
let historyId
|
|
|
|
beforeEach(async function () {
|
|
blob = await makeBlobForFile(filePath)
|
|
historyId = 'abc123def456abc789def123'
|
|
await backedUpBlobs.updateOne(
|
|
{
|
|
_id: new ObjectId(historyId),
|
|
},
|
|
{
|
|
$set: { blobs: [new Binary(Buffer.from(blob.getHash(), 'hex'))] },
|
|
},
|
|
{ upsert: true }
|
|
)
|
|
await backupBlob(historyId, blob, filePath)
|
|
})
|
|
|
|
it('does not upload the blob', async function () {
|
|
const bucketContents = await listS3Bucket(projectBlobsBucket)
|
|
expect(bucketContents).to.have.lengthOf(0)
|
|
})
|
|
})
|
|
|
|
describe('when the historyId is for a postgres project', function () {
|
|
let blob
|
|
let historyId
|
|
const projectId = new ObjectId()
|
|
|
|
beforeEach(async function () {
|
|
blob = await makeBlobForFile(filePath)
|
|
historyId = '123'
|
|
await projects.insertOne({
|
|
_id: projectId,
|
|
overleaf: { history: { id: 123 } },
|
|
})
|
|
await backupBlob(historyId, blob, filePath)
|
|
})
|
|
|
|
afterEach(async function () {
|
|
await projects.deleteOne({
|
|
_id: projectId,
|
|
})
|
|
})
|
|
|
|
it('uploads the blob to the backup', async function () {
|
|
const bucketContents = await listS3Bucket(projectBlobsBucket)
|
|
expect(bucketContents).to.have.lengthOf(1)
|
|
})
|
|
it('stores the backup', async function () {
|
|
expect(
|
|
await backedUpBlobs.findOne({
|
|
_id: projectId,
|
|
blobs: {
|
|
$elemMatch: { $eq: new Binary(Buffer.from(blob.getHash(), 'hex')) },
|
|
},
|
|
})
|
|
).to.exist
|
|
})
|
|
})
|
|
|
|
describe('when the blob is not already backed up', function () {
|
|
let blob
|
|
let historyId
|
|
beforeEach(async function () {
|
|
blob = await makeBlobForFile(filePath)
|
|
historyId = 'abc123def456abc789def123'
|
|
await backupBlob(historyId, blob, filePath)
|
|
})
|
|
|
|
it('uploads the blob to the backup', async function () {
|
|
const bucketContents = await listS3Bucket(projectBlobsBucket)
|
|
expect(bucketContents).to.have.lengthOf(1)
|
|
})
|
|
it('stores the backup', async function () {
|
|
expect(
|
|
await backedUpBlobs.findOne({
|
|
_id: new ObjectId(historyId),
|
|
blobs: {
|
|
$elemMatch: { $eq: new Binary(Buffer.from(blob.getHash(), 'hex')) },
|
|
},
|
|
})
|
|
).to.exist
|
|
})
|
|
})
|
|
|
|
const cases = [
|
|
{
|
|
name: 'text file',
|
|
content: Buffer.from('x'.repeat(1000)),
|
|
storedSize: 29, // zlib.gzipSync(content).byteLength
|
|
},
|
|
{
|
|
name: 'large text file',
|
|
// 'ä' is a 2-byte utf-8 character -> 4MB.
|
|
content: Buffer.from('ü'.repeat(2 * 1024 * 1024)),
|
|
storedSize: 4101, // zlib.gzipSync(content).byteLength
|
|
},
|
|
{
|
|
name: 'binary file',
|
|
content: Buffer.from([0, 1, 2, 3]),
|
|
storedSize: 4,
|
|
},
|
|
{
|
|
name: 'large binary file',
|
|
content: Crypto.randomBytes(10 * 1024 * 1024),
|
|
storedSize: 10 * 1024 * 1024,
|
|
},
|
|
]
|
|
for (const { name, content, storedSize } of cases) {
|
|
describe(name, function () {
|
|
let blob
|
|
let key
|
|
let historyId
|
|
beforeEach(async function () {
|
|
historyId = 'abc123def456abc789def123'
|
|
await fs.promises.writeFile(filePath, content)
|
|
blob = await makeBlobForFile(filePath)
|
|
blob.setStringLength(
|
|
await getStringLengthOfFile(blob.getByteLength(), filePath)
|
|
)
|
|
key = makeProjectKey(historyId, blob.getHash())
|
|
await backupBlob(historyId, blob, filePath)
|
|
})
|
|
it('should upload the blob', async function () {
|
|
const response = await listS3BucketRaw(projectBlobsBucket)
|
|
expect(response.Contents).to.have.length(1)
|
|
expect(response.Contents[0].Key).to.equal(key)
|
|
expect(response.Contents[0].Size).to.equal(storedSize)
|
|
})
|
|
it('should read back the same content', async function () {
|
|
const buf = new WritableBuffer()
|
|
await Stream.promises.pipeline(
|
|
await backupPersistor.getObjectStream(projectBlobsBucket, key, {
|
|
autoGunzip: true,
|
|
}),
|
|
buf
|
|
)
|
|
expect(buf.getContents()).to.deep.equal(content)
|
|
})
|
|
})
|
|
}
|
|
})
|
|
|
|
describe('downloadBlobToDir', function () {
|
|
let tmpDirDownload
|
|
const historyId = 'abc123def456abc789def123'
|
|
|
|
before(async function () {
|
|
tmpDirDownload = await fs.promises.mkdtemp(
|
|
path.join(os.tmpdir(), 'downloadBlobTest-')
|
|
)
|
|
})
|
|
|
|
after(async function () {
|
|
await fs.promises.rm(tmpDirDownload, { recursive: true, force: true })
|
|
})
|
|
|
|
it('should download the blob successfully', async function () {
|
|
const data = 'hello world'
|
|
// Use putString instead of writing a source file and using makeBlobForFile
|
|
const blobStore = new BlobStore(historyId)
|
|
const blob = await blobStore.putString(data)
|
|
|
|
// Now call downloadBlobToDir which will use blobStore.getStream internally
|
|
const downloadedFilePath = await downloadBlobToDir(
|
|
historyId,
|
|
blob,
|
|
tmpDirDownload
|
|
)
|
|
const contents = await fs.promises.readFile(downloadedFilePath, 'utf8')
|
|
expect(contents).to.equal(data)
|
|
})
|
|
|
|
it('should delete the file on error (if file already exists)', async function () {
|
|
const data = 'data that will not be written'
|
|
const blobStore = new BlobStore(historyId)
|
|
const blob = await blobStore.putString(data)
|
|
const hash = blob.getHash()
|
|
const fileName = `${historyId}-${hash}`
|
|
|
|
// Pre-create the destination file to trigger a failure due to an existing file
|
|
const downloadedFilePath = path.join(tmpDirDownload, fileName)
|
|
await fs.promises.writeFile(downloadedFilePath, 'preexisting content')
|
|
|
|
try {
|
|
await downloadBlobToDir(historyId, blob, tmpDirDownload)
|
|
expect.fail('should not reach here')
|
|
} catch (error) {
|
|
// Check that the file was deleted
|
|
await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
|
|
}
|
|
})
|
|
|
|
it('should not leave an empty file if download fails', async function () {
|
|
// Create a blob with a hash that does not exist in the blob store
|
|
const hash = '0000000000000000000000000000000000000000'
|
|
const blob = new Blob(hash, 12, 12)
|
|
await insertBlob(historyId, blob)
|
|
const fileName = `${historyId}-${hash}`
|
|
try {
|
|
await downloadBlobToDir(historyId, blob, tmpDirDownload)
|
|
expect.fail('should not reach here')
|
|
} catch (error) {
|
|
expect(error).to.be.instanceOf(Blob.NotFoundError)
|
|
const downloadedFilePath = path.join(tmpDirDownload, fileName)
|
|
// Check that the file was deleted
|
|
await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
|
|
}
|
|
})
|
|
})
|