first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
const SandboxedModule = require('sandboxed-module')
const chai = require('chai')
const sinon = require('sinon')
chai.use(require('sinon-chai'))
chai.use(require('chai-as-promised'))
SandboxedModule.configure({
requires: {
'@overleaf/logger': {
debug() {},
log() {},
info() {},
warn() {},
error() {},
err() {},
},
'@overleaf/metrics': {
inc: sinon.stub(),
count: sinon.stub(),
histogram: sinon.stub(),
Timer: class Timer {
done() {}
},
},
},
globals: { Buffer, Math, console, process, URL },
sourceTransformers: {
removeNodePrefix: function (source) {
return source.replace(/require\(['"]node:/g, "require('")
},
},
})

View File

@@ -0,0 +1,417 @@
const crypto = require('node:crypto')
const { expect } = require('chai')
const mockFs = require('mock-fs')
const fs = require('node:fs')
const fsPromises = require('node:fs/promises')
const Path = require('node:path')
const StreamPromises = require('node:stream/promises')
const SandboxedModule = require('sandboxed-module')
const Errors = require('../../src/Errors')
const MODULE_PATH = '../../src/FSPersistor.js'
describe('FSPersistorTests', function () {
const localFiles = {
'/uploads/info.txt': Buffer.from('This information is critical', {
encoding: 'utf-8',
}),
'/uploads/other.txt': Buffer.from('Some other content', {
encoding: 'utf-8',
}),
}
const location = '/bucket'
const files = {
wombat: 'animals/wombat.tex',
giraffe: 'animals/giraffe.tex',
potato: 'vegetables/potato.tex',
}
const scenarios = [
{
description: 'default settings',
settings: {},
fsPath: key => Path.join(location, key.replaceAll('/', '_')),
},
{
description: 'with useSubdirectories = true',
settings: { useSubdirectories: true },
fsPath: key => Path.join(location, key),
},
]
for (const scenario of scenarios) {
describe(scenario.description, function () {
let persistor
beforeEach(function () {
const FSPersistor = SandboxedModule.require(MODULE_PATH, {
requires: {
'fs/promises': fsPromises,
'stream/promises': StreamPromises,
'./Errors': Errors,
},
})
persistor = new FSPersistor(scenario.settings)
})
beforeEach(function () {
mockFs({
...localFiles,
'/not-a-dir':
'This regular file is meant to prevent using this path as a directory',
'/directory/subdirectory': {},
})
})
afterEach(function () {
mockFs.restore()
})
describe('sendFile', function () {
it('should copy the file', async function () {
await persistor.sendFile(location, files.wombat, '/uploads/info.txt')
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be.true
})
it('should return an error if the file cannot be stored', async function () {
await expect(
persistor.sendFile('/not-a-dir', files.wombat, '/uploads/info.txt')
).to.be.rejectedWith(Errors.WriteError)
})
})
describe('sendStream', function () {
let stream
describe("when the file doesn't exist", function () {
beforeEach(function () {
stream = fs.createReadStream('/uploads/info.txt')
})
it('should write the stream to disk', async function () {
await persistor.sendStream(location, files.wombat, stream)
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be.true
})
it('should delete the temporary file', async function () {
await persistor.sendStream(location, files.wombat, stream)
const entries = await fsPromises.readdir(location)
const tempDirs = entries.filter(dir => dir.startsWith('tmp-'))
expect(tempDirs).to.be.empty
})
describe('on error', function () {
beforeEach(async function () {
await expect(
persistor.sendStream('/not-a-dir', files.wombat, stream)
).to.be.rejectedWith(Errors.WriteError)
})
it('should not write the target file', async function () {
await expect(fsPromises.access(scenario.fsPath(files.wombat))).to
.be.rejected
})
it('should delete the temporary file', async function () {
await persistor.sendStream(location, files.wombat, stream)
const entries = await fsPromises.readdir(location)
const tempDirs = entries.filter(dir => dir.startsWith('tmp-'))
expect(tempDirs).to.be.empty
})
})
describe('when the md5 hash matches', function () {
it('should write the stream to disk', async function () {
await persistor.sendStream(location, files.wombat, stream, {
sourceMd5: md5(localFiles['/uploads/info.txt']),
})
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be
.true
})
})
describe('when the md5 hash does not match', function () {
beforeEach(async function () {
await expect(
persistor.sendStream(location, files.wombat, stream, {
sourceMd5: md5('wrong content'),
})
).to.be.rejectedWith(Errors.WriteError)
})
it('should not write the target file', async function () {
await expect(fsPromises.access(scenario.fsPath(files.wombat))).to
.be.rejected
})
it('should delete the temporary file', async function () {
await persistor.sendStream(location, files.wombat, stream)
const entries = await fsPromises.readdir(location)
const tempDirs = entries.filter(dir => dir.startsWith('tmp-'))
expect(tempDirs).to.be.empty
})
})
})
describe('when the file already exists', function () {
let stream
beforeEach(async function () {
await persistor.sendFile(
location,
files.wombat,
'/uploads/info.txt'
)
stream = fs.createReadStream('/uploads/other.txt')
})
it('should write the stream to disk', async function () {
await persistor.sendStream(location, files.wombat, stream)
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/other.txt'])).to.be.true
})
it('should delete the temporary file', async function () {
await persistor.sendStream(location, files.wombat, stream)
const entries = await fsPromises.readdir(location)
const tempDirs = entries.filter(dir => dir.startsWith('tmp-'))
expect(tempDirs).to.be.empty
})
describe('on error', function () {
beforeEach(async function () {
await expect(
persistor.sendStream('/not-a-dir', files.wombat, stream)
).to.be.rejectedWith(Errors.WriteError)
})
it('should not update the target file', async function () {
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be
.true
})
it('should delete the temporary file', async function () {
await persistor.sendStream(location, files.wombat, stream)
const entries = await fsPromises.readdir(location)
const tempDirs = entries.filter(dir => dir.startsWith('tmp-'))
expect(tempDirs).to.be.empty
})
})
describe('when the md5 hash matches', function () {
it('should write the stream to disk', async function () {
await persistor.sendStream(location, files.wombat, stream, {
sourceMd5: md5(localFiles['/uploads/other.txt']),
})
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/other.txt'])).to.be
.true
})
})
describe('when the md5 hash does not match', function () {
beforeEach(async function () {
await expect(
persistor.sendStream(location, files.wombat, stream, {
sourceMd5: md5('wrong content'),
})
).to.be.rejectedWith(Errors.WriteError)
})
it('should not update the target file', async function () {
const contents = await fsPromises.readFile(
scenario.fsPath(files.wombat)
)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be
.true
})
it('should delete the temporary file', async function () {
await persistor.sendStream(location, files.wombat, stream)
const entries = await fsPromises.readdir(location)
const tempDirs = entries.filter(dir => dir.startsWith('tmp-'))
expect(tempDirs).to.be.empty
})
})
})
})
describe('getObjectStream', function () {
beforeEach(async function () {
await persistor.sendFile(location, files.wombat, '/uploads/info.txt')
})
it('should return a string with the object contents', async function () {
const stream = await persistor.getObjectStream(location, files.wombat)
const contents = await streamToBuffer(stream)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be.true
})
it('should support ranges', async function () {
const stream = await persistor.getObjectStream(
location,
files.wombat,
{
start: 5,
end: 16,
}
)
const contents = await streamToBuffer(stream)
// end is inclusive in ranges, but exclusive in slice()
expect(contents.equals(localFiles['/uploads/info.txt'].slice(5, 17)))
.to.be.true
})
it('should give a NotFoundError if the file does not exist', async function () {
await expect(
persistor.getObjectStream(location, 'does-not-exist')
).to.be.rejectedWith(Errors.NotFoundError)
})
})
describe('getObjectSize', function () {
beforeEach(async function () {
await persistor.sendFile(location, files.wombat, '/uploads/info.txt')
})
it('should return the file size', async function () {
expect(
await persistor.getObjectSize(location, files.wombat)
).to.equal(localFiles['/uploads/info.txt'].length)
})
it('should throw a NotFoundError if the file does not exist', async function () {
await expect(
persistor.getObjectSize(location, 'does-not-exist')
).to.be.rejectedWith(Errors.NotFoundError)
})
})
describe('copyObject', function () {
beforeEach(async function () {
await persistor.sendFile(location, files.wombat, '/uploads/info.txt')
})
it('Should copy the file to the new location', async function () {
await persistor.copyObject(location, files.wombat, files.potato)
const contents = await fsPromises.readFile(
scenario.fsPath(files.potato)
)
expect(contents.equals(localFiles['/uploads/info.txt'])).to.be.true
})
})
describe('deleteObject', function () {
beforeEach(async function () {
await persistor.sendFile(location, files.wombat, '/uploads/info.txt')
await fsPromises.access(scenario.fsPath(files.wombat))
})
it('should delete the file', async function () {
await persistor.deleteObject(location, files.wombat)
await expect(fsPromises.access(scenario.fsPath(files.wombat))).to.be
.rejected
})
it("should ignore files that don't exist", async function () {
await persistor.deleteObject(location, 'does-not-exist')
})
})
describe('deleteDirectory', function () {
beforeEach(async function () {
for (const file of Object.values(files)) {
await persistor.sendFile(location, file, '/uploads/info.txt')
await fsPromises.access(scenario.fsPath(file))
}
})
it('should delete all files under the directory', async function () {
await persistor.deleteDirectory(location, 'animals')
for (const file of [files.wombat, files.giraffe]) {
await expect(fsPromises.access(scenario.fsPath(file))).to.be
.rejected
}
})
it('should not delete files under other directoris', async function () {
await persistor.deleteDirectory(location, 'animals')
await fsPromises.access(scenario.fsPath(files.potato))
})
it("should ignore directories that don't exist", async function () {
await persistor.deleteDirectory(location, 'does-not-exist')
for (const file of Object.values(files)) {
await fsPromises.access(scenario.fsPath(file))
}
})
})
describe('checkIfObjectExists', function () {
beforeEach(async function () {
await persistor.sendFile(location, files.wombat, '/uploads/info.txt')
})
it('should return true for existing files', async function () {
expect(
await persistor.checkIfObjectExists(location, files.wombat)
).to.equal(true)
})
it('should return false for non-existing files', async function () {
expect(
await persistor.checkIfObjectExists(location, 'does-not-exist')
).to.equal(false)
})
})
describe('directorySize', function () {
beforeEach(async function () {
for (const file of Object.values(files)) {
await persistor.sendFile(location, file, '/uploads/info.txt')
}
})
it('should sum directory files size', async function () {
expect(await persistor.directorySize(location, 'animals')).to.equal(
2 * localFiles['/uploads/info.txt'].length
)
})
it('should return 0 on non-existing directories', async function () {
expect(
await persistor.directorySize(location, 'does-not-exist')
).to.equal(0)
})
})
})
}
})
function md5(str) {
return crypto.createHash('md5').update(str).digest('hex')
}
async function streamToBuffer(stream) {
const chunks = []
for await (const chunk of stream) {
chunks.push(chunk)
}
return Buffer.concat(chunks)
}

View File

@@ -0,0 +1,726 @@
const { EventEmitter } = require('node:events')
const sinon = require('sinon')
const chai = require('chai')
const { expect } = chai
const modulePath = '../../src/GcsPersistor.js'
const SandboxedModule = require('sandboxed-module')
const { ObjectId } = require('mongodb')
const asyncPool = require('tiny-async-pool')
const Errors = require('../../src/Errors')
describe('GcsPersistorTests', function () {
const filename = '/wombat/potato.tex'
const bucket = 'womBucket'
const key = 'monKey'
const destKey = 'donKey'
const genericError = new Error('guru meditation error')
const filesSize = 33
const md5 = 'ffffffff00000000ffffffff00000000'
const WriteStream = 'writeStream'
const redirectUrl = 'https://wombat.potato/giraffe'
let Logger,
Transform,
PassThrough,
Storage,
Fs,
GcsNotFoundError,
ReadStream,
Stream,
StreamPromises,
GcsBucket,
GcsFile,
GcsPersistor,
FileNotFoundError,
Hash,
Settings,
crypto,
files
beforeEach(function () {
Settings = {
directoryKeyRegex: /^[0-9a-fA-F]{24}\/[0-9a-fA-F]{24}/,
}
files = [
{
metadata: { size: '11', md5Hash: '/////wAAAAD/////AAAAAA==' },
delete: sinon.stub(),
},
{
metadata: { size: '22', md5Hash: '/////wAAAAD/////AAAAAA==' },
delete: sinon.stub(),
},
]
class FakeGCSResponse extends EventEmitter {
constructor() {
super()
this.statusCode = 200
this.err = null
}
read() {
if (this.err) return this.emit('error', this.err)
this.emit('response', { statusCode: this.statusCode, headers: {} })
}
}
ReadStream = new FakeGCSResponse()
PassThrough = class {}
Transform = class {
once() {}
}
Stream = {
PassThrough,
Transform,
}
StreamPromises = {
pipeline: sinon.stub().resolves(),
}
GcsFile = {
delete: sinon.stub().resolves(),
createReadStream: sinon.stub().returns(ReadStream),
getMetadata: sinon.stub().resolves([files[0].metadata]),
createWriteStream: sinon.stub().returns(WriteStream),
copy: sinon.stub().resolves(),
exists: sinon.stub().resolves([true]),
getSignedUrl: sinon.stub().resolves([redirectUrl]),
}
GcsBucket = {
file: sinon.stub().returns(GcsFile),
getFiles: sinon.stub().resolves([files]),
}
Storage = class {
constructor() {
this.interceptors = []
}
}
Storage.prototype.bucket = sinon.stub().returns(GcsBucket)
GcsNotFoundError = new Error('File not found')
GcsNotFoundError.code = 404
Fs = {
createReadStream: sinon.stub().returns(ReadStream),
}
FileNotFoundError = new Error('File not found')
FileNotFoundError.code = 'ENOENT'
Hash = {
end: sinon.stub(),
read: sinon.stub().returns(md5),
digest: sinon.stub().returns(md5),
setEncoding: sinon.stub(),
}
crypto = {
createHash: sinon.stub().returns(Hash),
}
Logger = {
warn: sinon.stub(),
}
GcsPersistor = new (SandboxedModule.require(modulePath, {
requires: {
'@google-cloud/storage': { Storage },
'@overleaf/logger': Logger,
'tiny-async-pool': asyncPool,
'./Errors': Errors,
fs: Fs,
stream: Stream,
'stream/promises': StreamPromises,
crypto,
},
globals: { console, Buffer },
}))(Settings)
})
describe('getObjectStream', function () {
describe('when called with valid parameters', function () {
let stream
beforeEach(async function () {
stream = await GcsPersistor.getObjectStream(bucket, key)
})
it('returns a PassThrough stream', function () {
expect(stream).to.be.instanceOf(PassThrough)
})
it('fetches the right key from the right bucket', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.createReadStream).to.have.been.called
})
it('disables automatic decompression', function () {
expect(GcsFile.createReadStream).to.have.been.calledWith({
decompress: false,
})
})
it('pipes the stream through the meter', function () {
expect(StreamPromises.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Transform),
sinon.match.instanceOf(PassThrough)
)
})
})
describe('when called with a byte range', function () {
let stream
beforeEach(async function () {
stream = await GcsPersistor.getObjectStream(bucket, key, {
start: 5,
end: 10,
})
})
it('returns a PassThrough stream', function () {
expect(stream).to.be.instanceOf(PassThrough)
})
it('passes the byte range on to GCS', function () {
expect(GcsFile.createReadStream).to.have.been.calledWith({
decompress: false,
start: 5,
end: 10,
})
})
})
describe("when the file doesn't exist", function () {
let error, stream
beforeEach(async function () {
ReadStream.statusCode = 404
try {
stream = await GcsPersistor.getObjectStream(bucket, key)
} catch (e) {
error = e
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key })
})
})
describe('when Gcs encounters an unknown error', function () {
let error, stream
beforeEach(async function () {
ReadStream.err = genericError
try {
stream = await GcsPersistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key })
})
})
})
describe('getRedirectUrl', function () {
let signedUrl
describe('with signed URLs', function () {
beforeEach(async function () {
signedUrl = await GcsPersistor.getRedirectUrl(bucket, key)
})
it('should request a signed URL', function () {
expect(GcsFile.getSignedUrl).to.have.been.called
})
it('should return the url', function () {
expect(signedUrl).to.equal(redirectUrl)
})
})
describe('with unsigned URLs', function () {
beforeEach(async function () {
GcsPersistor.settings.unsignedUrls = true
GcsPersistor.settings.endpoint = {
apiEndpoint: 'http://custom.endpoint',
}
signedUrl = await GcsPersistor.getRedirectUrl(bucket, key)
})
it('should return a plain URL', function () {
expect(signedUrl).to.equal(
`http://custom.endpoint/download/storage/v1/b/${bucket}/o/${key}?alt=media`
)
})
})
})
describe('getObjectSize', function () {
describe('when called with valid parameters', function () {
let size
beforeEach(async function () {
size = await GcsPersistor.getObjectSize(bucket, key)
})
it('should return the object size', function () {
expect(size).to.equal(11)
})
it('should pass the bucket and key to GCS', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.getMetadata).to.have.been.called
})
})
describe('when the object is not found', function () {
let error
beforeEach(async function () {
GcsFile.getMetadata = sinon.stub().rejects(GcsNotFoundError)
try {
await GcsPersistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(GcsNotFoundError)
})
})
describe('when GCS returns an error', function () {
let error
beforeEach(async function () {
GcsFile.getMetadata = sinon.stub().rejects(genericError)
try {
await GcsPersistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('sendStream', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.sendStream(bucket, key, ReadStream)
})
it('should upload the stream', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.createWriteStream).to.have.been.called
})
it('should not try to create a resumable upload', function () {
expect(GcsFile.createWriteStream).to.have.been.calledWith({
resumable: false,
})
})
it('should meter the stream and pass it to GCS', function () {
expect(StreamPromises.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Transform),
WriteStream
)
})
it('calculates the md5 hash of the file', function () {
expect(Hash.digest).to.have.been.called
})
})
describe('when a hash is supplied', function () {
beforeEach(async function () {
return GcsPersistor.sendStream(bucket, key, ReadStream, {
sourceMd5: 'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb',
})
})
it('should not calculate the md5 hash of the file', function () {
expect(Hash.digest).not.to.have.been.called
})
it('sends the hash in base64', function () {
expect(GcsFile.createWriteStream).to.have.been.calledWith({
validation: 'md5',
metadata: {
md5Hash: 'qqqqqru7u7uqqqqqu7u7uw==',
},
resumable: false,
})
})
it('does not fetch the md5 hash of the uploaded file', function () {
expect(GcsFile.getMetadata).not.to.have.been.called
})
})
describe('when metadata is supplied', function () {
const contentType = 'text/csv'
const contentEncoding = 'gzip'
beforeEach(async function () {
return GcsPersistor.sendStream(bucket, key, ReadStream, {
contentType,
contentEncoding,
})
})
it('should send the metadata to GCS', function () {
expect(GcsFile.createWriteStream).to.have.been.calledWith({
metadata: { contentType, contentEncoding },
resumable: false,
})
})
})
describe('when the upload fails', function () {
let error
beforeEach(async function () {
StreamPromises.pipeline
.withArgs(ReadStream, sinon.match.instanceOf(Transform), WriteStream)
.rejects(genericError)
try {
await GcsPersistor.sendStream(bucket, key, ReadStream)
} catch (err) {
error = err
}
})
it('throws a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
it('wraps the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('sendFile', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.sendFile(bucket, key, filename)
})
it('should create a read stream for the file', function () {
expect(Fs.createReadStream).to.have.been.calledWith(filename)
})
it('should create a write stream', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.createWriteStream).to.have.been.called
})
it('should upload the stream via the meter', function () {
expect(StreamPromises.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Transform),
WriteStream
)
})
})
})
describe('copyObject', function () {
const destinationFile = 'destFile'
beforeEach(function () {
GcsBucket.file.withArgs(destKey).returns(destinationFile)
})
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.copyObject(bucket, key, destKey)
})
it('should copy the object', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.copy).to.have.been.calledWith(destinationFile)
})
})
describe('when the file does not exist', function () {
let error
beforeEach(async function () {
GcsFile.copy = sinon.stub().rejects(GcsNotFoundError)
try {
await GcsPersistor.copyObject(bucket, key, destKey)
} catch (err) {
error = err
}
})
it('should throw a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
})
})
describe('deleteObject', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.deleteObject(bucket, key)
})
it('should delete the object', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.delete).to.have.been.called
})
})
describe('when the file does not exist', function () {
let error
beforeEach(async function () {
GcsFile.delete = sinon.stub().rejects(GcsNotFoundError)
try {
await GcsPersistor.deleteObject(bucket, key)
} catch (err) {
error = err
}
})
it('should not throw an error', function () {
expect(error).not.to.exist
})
})
})
describe('deleteDirectory', function () {
const directoryName = `${new ObjectId()}/${new ObjectId()}`
const directoryPrefix = `${directoryName}/`
describe('with valid parameters', function () {
beforeEach(async function () {
GcsBucket.getFiles = sinon.stub()
// set up multiple paginated calls to getFiles
GcsBucket.getFiles
.withArgs({ prefix: directoryPrefix, autoPaginate: false })
.resolves([['aaa', 'bbb'], 'call-1'])
GcsBucket.getFiles
.withArgs('call-1')
.resolves([['ccc', 'ddd', 'eee'], 'call-2'])
GcsBucket.getFiles.withArgs('call-2').resolves([['fff', 'ggg']])
return GcsPersistor.deleteDirectory(bucket, directoryName)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({
prefix: directoryPrefix,
autoPaginate: false,
})
expect(GcsBucket.getFiles).to.have.been.calledWith('call-1')
expect(GcsBucket.getFiles).to.have.been.calledWith('call-2')
})
it('should delete the files', function () {
expect(GcsFile.delete.callCount).to.equal(7)
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
GcsBucket.getFiles = sinon.stub().rejects(genericError)
try {
await GcsPersistor.deleteDirectory(bucket, directoryName)
} catch (err) {
error = err
}
})
it('should generate a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('directorySize', function () {
describe('with valid parameters', function () {
let size
beforeEach(async function () {
size = await GcsPersistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({
prefix: `${key}/`,
})
})
it('should return the directory size', function () {
expect(size).to.equal(filesSize)
})
})
describe('when there are no files', function () {
let size
beforeEach(async function () {
GcsBucket.getFiles.resolves([[]])
size = await GcsPersistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({
prefix: `${key}/`,
})
})
it('should return zero', function () {
expect(size).to.equal(0)
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
GcsBucket.getFiles.rejects(genericError)
try {
await GcsPersistor.directorySize(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('checkIfObjectExists', function () {
describe('when the file exists', function () {
let exists
beforeEach(async function () {
exists = await GcsPersistor.checkIfObjectExists(bucket, key)
})
it('should ask the file if it exists', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.exists).to.have.been.called
})
it('should return that the file exists', function () {
expect(exists).to.equal(true)
})
})
describe('when the file does not exist', function () {
let exists
beforeEach(async function () {
GcsFile.exists = sinon.stub().resolves([false])
exists = await GcsPersistor.checkIfObjectExists(bucket, key)
})
it('should get the object header', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.exists).to.have.been.called
})
it('should return that the file does not exist', function () {
expect(exists).to.equal(false)
})
})
describe('when there is an error', function () {
let error
beforeEach(async function () {
GcsFile.exists = sinon.stub().rejects(genericError)
try {
await GcsPersistor.checkIfObjectExists(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
})

View File

@@ -0,0 +1,532 @@
const sinon = require('sinon')
const chai = require('chai')
const { expect } = chai
const modulePath = '../../src/MigrationPersistor.js'
const SandboxedModule = require('sandboxed-module')
const Errors = require('../../src/Errors')
// Not all methods are tested here, but a method with each type of wrapping has
// tests. Specifically, the following wrapping methods are tested here:
// getObjectStream: _wrapFallbackMethod
// sendStream: forward-to-primary
// deleteObject: _wrapMethodOnBothPersistors
// copyObject: copyFileWithFallback
describe('MigrationPersistorTests', function () {
const bucket = 'womBucket'
const fallbackBucket = 'bucKangaroo'
const key = 'monKey'
const destKey = 'donKey'
const genericError = new Error('guru meditation error')
const notFoundError = new Errors.NotFoundError('not found')
const size = 33
const md5 = 'ffffffff'
let Settings,
Logger,
Stream,
StreamPromises,
MigrationPersistor,
fileStream,
newPersistor
beforeEach(function () {
fileStream = {
name: 'fileStream',
on: sinon.stub().withArgs('end').yields(),
pipe: sinon.stub(),
}
newPersistor = function (hasFile) {
return {
sendFile: sinon.stub().resolves(),
sendStream: sinon.stub().resolves(),
getObjectStream: hasFile
? sinon.stub().resolves(fileStream)
: sinon.stub().rejects(notFoundError),
deleteDirectory: sinon.stub().resolves(),
getObjectSize: hasFile
? sinon.stub().resolves(size)
: sinon.stub().rejects(notFoundError),
deleteObject: sinon.stub().resolves(),
copyObject: hasFile
? sinon.stub().resolves()
: sinon.stub().rejects(notFoundError),
checkIfObjectExists: sinon.stub().resolves(hasFile),
directorySize: hasFile
? sinon.stub().resolves(size)
: sinon.stub().rejects(notFoundError),
getObjectMd5Hash: hasFile
? sinon.stub().resolves(md5)
: sinon.stub().rejects(notFoundError),
}
}
Settings = {
buckets: {
[bucket]: fallbackBucket,
},
}
Stream = {
PassThrough: sinon.stub(),
}
StreamPromises = {
pipeline: sinon.stub().resolves(),
}
Logger = {
warn: sinon.stub(),
}
MigrationPersistor = SandboxedModule.require(modulePath, {
requires: {
stream: Stream,
'stream/promises': StreamPromises,
'./Errors': Errors,
'@overleaf/logger': Logger,
},
globals: { console },
})
})
describe('getObjectStream', function () {
const options = { wombat: 'potato' }
describe('when the primary persistor has the file', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor, response
beforeEach(async function () {
primaryPersistor = newPersistor(true)
fallbackPersistor = newPersistor(false)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
response = await migrationPersistor.getObjectStream(
bucket,
key,
options
)
})
it('should return the file stream', function () {
expect(response).to.equal(fileStream)
})
it('should fetch the file from the primary persistor, with the correct options', function () {
expect(primaryPersistor.getObjectStream).to.have.been.calledWithExactly(
bucket,
key,
options
)
})
it('should not query the fallback persistor', function () {
expect(fallbackPersistor.getObjectStream).not.to.have.been.called
})
})
describe('when the fallback persistor has the file', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor, response
beforeEach(async function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(true)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
response = await migrationPersistor.getObjectStream(
bucket,
key,
options
)
})
it('should return the file stream', function () {
expect(response).to.be.an.instanceOf(Stream.PassThrough)
})
it('should fetch the file from the primary persistor with the correct options', function () {
expect(primaryPersistor.getObjectStream).to.have.been.calledWithExactly(
bucket,
key,
options
)
})
it('should fetch the file from the fallback persistor with the fallback bucket with the correct options', function () {
expect(
fallbackPersistor.getObjectStream
).to.have.been.calledWithExactly(fallbackBucket, key, options)
})
it('should create one read stream', function () {
expect(fallbackPersistor.getObjectStream).to.have.been.calledOnce
})
it('should not send the file to the primary', function () {
expect(primaryPersistor.sendStream).not.to.have.been.called
})
})
describe('when the file should be copied to the primary', function () {
let primaryPersistor,
fallbackPersistor,
migrationPersistor,
returnedStream
beforeEach(async function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(true)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
Settings.copyOnMiss = true
returnedStream = await migrationPersistor.getObjectStream(
bucket,
key,
options
)
})
it('should create one read stream', function () {
expect(fallbackPersistor.getObjectStream).to.have.been.calledOnce
})
it('should get the md5 hash from the source', function () {
expect(fallbackPersistor.getObjectMd5Hash).to.have.been.calledWith(
fallbackBucket,
key
)
})
it('should send a stream to the primary', function () {
expect(primaryPersistor.sendStream).to.have.been.calledWithExactly(
bucket,
key,
sinon.match.instanceOf(Stream.PassThrough),
{ sourceMd5: md5 }
)
})
it('should send a stream to the client', function () {
expect(returnedStream).to.be.an.instanceOf(Stream.PassThrough)
})
})
describe('when neither persistor has the file', function () {
it('rejects with a NotFoundError', async function () {
const migrationPersistor = new MigrationPersistor(
newPersistor(false),
newPersistor(false),
Settings
)
await expect(
migrationPersistor.getObjectStream(bucket, key)
).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError)
})
})
describe('when the primary persistor throws an unexpected error', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor, error
beforeEach(async function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(true)
primaryPersistor.getObjectStream = sinon.stub().rejects(genericError)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
try {
await migrationPersistor.getObjectStream(bucket, key, options)
} catch (err) {
error = err
}
})
it('rejects with the error', function () {
expect(error).to.equal(genericError)
})
it('does not call the fallback', function () {
expect(fallbackPersistor.getObjectStream).not.to.have.been.called
})
})
describe('when the fallback persistor throws an unexpected error', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor, error
beforeEach(async function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(false)
fallbackPersistor.getObjectStream = sinon.stub().rejects(genericError)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
try {
await migrationPersistor.getObjectStream(bucket, key, options)
} catch (err) {
error = err
}
})
it('rejects with the error', function () {
expect(error).to.equal(genericError)
})
it('should have called the fallback', function () {
expect(fallbackPersistor.getObjectStream).to.have.been.calledWith(
fallbackBucket,
key
)
})
})
})
describe('sendStream', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor
beforeEach(function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(false)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
})
describe('when it works', function () {
beforeEach(async function () {
return migrationPersistor.sendStream(bucket, key, fileStream)
})
it('should send the file to the primary persistor', function () {
expect(primaryPersistor.sendStream).to.have.been.calledWithExactly(
bucket,
key,
fileStream
)
})
it('should not send the file to the fallback persistor', function () {
expect(fallbackPersistor.sendStream).not.to.have.been.called
})
})
describe('when the primary persistor throws an error', function () {
it('returns the error', async function () {
primaryPersistor.sendStream.rejects(notFoundError)
await expect(
migrationPersistor.sendStream(bucket, key, fileStream)
).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError)
})
})
})
describe('deleteObject', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor
beforeEach(function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(false)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
})
describe('when it works', function () {
beforeEach(async function () {
return migrationPersistor.deleteObject(bucket, key)
})
it('should delete the file from the primary', function () {
expect(primaryPersistor.deleteObject).to.have.been.calledWithExactly(
bucket,
key
)
})
it('should delete the file from the fallback', function () {
expect(fallbackPersistor.deleteObject).to.have.been.calledWithExactly(
fallbackBucket,
key
)
})
})
describe('when the primary persistor throws an error', function () {
let error
beforeEach(async function () {
primaryPersistor.deleteObject.rejects(genericError)
try {
await migrationPersistor.deleteObject(bucket, key)
} catch (err) {
error = err
}
})
it('should return the error', function () {
expect(error).to.equal(genericError)
})
it('should delete the file from the primary', function () {
expect(primaryPersistor.deleteObject).to.have.been.calledWithExactly(
bucket,
key
)
})
it('should delete the file from the fallback', function () {
expect(fallbackPersistor.deleteObject).to.have.been.calledWithExactly(
fallbackBucket,
key
)
})
})
describe('when the fallback persistor throws an error', function () {
let error
beforeEach(async function () {
fallbackPersistor.deleteObject.rejects(genericError)
try {
await migrationPersistor.deleteObject(bucket, key)
} catch (err) {
error = err
}
})
it('should return the error', function () {
expect(error).to.equal(genericError)
})
it('should delete the file from the primary', function () {
expect(primaryPersistor.deleteObject).to.have.been.calledWithExactly(
bucket,
key
)
})
it('should delete the file from the fallback', function () {
expect(fallbackPersistor.deleteObject).to.have.been.calledWithExactly(
fallbackBucket,
key
)
})
})
})
describe('copyObject', function () {
describe('when the file exists on the primary', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor
beforeEach(async function () {
primaryPersistor = newPersistor(true)
fallbackPersistor = newPersistor(false)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
return migrationPersistor.copyObject(bucket, key, destKey)
})
it('should call copyObject to copy the file', function () {
expect(primaryPersistor.copyObject).to.have.been.calledWithExactly(
bucket,
key,
destKey
)
})
it('should not try to read from the fallback', function () {
expect(fallbackPersistor.getObjectStream).not.to.have.been.called
})
})
describe('when the file does not exist on the primary', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor
beforeEach(async function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(true)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
return migrationPersistor.copyObject(bucket, key, destKey)
})
it('should call copyObject to copy the file', function () {
expect(primaryPersistor.copyObject).to.have.been.calledWithExactly(
bucket,
key,
destKey
)
})
it('should fetch the file from the fallback', function () {
expect(
fallbackPersistor.getObjectStream
).not.to.have.been.calledWithExactly(fallbackBucket, key)
})
it('should get the md5 hash from the source', function () {
expect(fallbackPersistor.getObjectMd5Hash).to.have.been.calledWith(
fallbackBucket,
key
)
})
it('should send the file to the primary', function () {
expect(primaryPersistor.sendStream).to.have.been.calledWithExactly(
bucket,
destKey,
sinon.match.instanceOf(Stream.PassThrough),
{ sourceMd5: md5 }
)
})
})
describe('when the file does not exist on the fallback', function () {
let primaryPersistor, fallbackPersistor, migrationPersistor, error
beforeEach(async function () {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(false)
migrationPersistor = new MigrationPersistor(
primaryPersistor,
fallbackPersistor,
Settings
)
try {
await migrationPersistor.copyObject(bucket, key, destKey)
} catch (err) {
error = err
}
})
it('should call copyObject to copy the file', function () {
expect(primaryPersistor.copyObject).to.have.been.calledWithExactly(
bucket,
key,
destKey
)
})
it('should fetch the file from the fallback', function () {
expect(
fallbackPersistor.getObjectStream
).not.to.have.been.calledWithExactly(fallbackBucket, key)
})
it('should return a not-found error', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
})
})
})

View File

@@ -0,0 +1,102 @@
const chai = require('chai')
const { expect } = chai
const SandboxedModule = require('sandboxed-module')
const StreamPromises = require('node:stream/promises')
const MODULE_PATH = '../../src/PersistorFactory.js'
describe('PersistorManager', function () {
let PersistorFactory, FSPersistor, S3Persistor, Settings, GcsPersistor
beforeEach(function () {
FSPersistor = class {
constructor(settings) {
this.settings = settings
}
wrappedMethod() {
return 'FSPersistor'
}
}
S3Persistor = class {
wrappedMethod() {
return 'S3Persistor'
}
}
GcsPersistor = class {
wrappedMethod() {
return 'GcsPersistor'
}
}
Settings = {}
const requires = {
'./GcsPersistor': GcsPersistor,
'./S3Persistor': { S3Persistor },
'./FSPersistor': FSPersistor,
'@overleaf/logger': {
info() {},
err() {},
},
'stream/promises': StreamPromises,
}
PersistorFactory = SandboxedModule.require(MODULE_PATH, { requires })
})
it('should implement the S3 wrapped method when S3 is configured', function () {
Settings.backend = 's3'
expect(PersistorFactory(Settings)).to.respondTo('wrappedMethod')
expect(PersistorFactory(Settings).wrappedMethod()).to.equal('S3Persistor')
})
it("should implement the S3 wrapped method when 'aws-sdk' is configured", function () {
Settings.backend = 'aws-sdk'
expect(PersistorFactory(Settings)).to.respondTo('wrappedMethod')
expect(PersistorFactory(Settings).wrappedMethod()).to.equal('S3Persistor')
})
it('should implement the FS wrapped method when FS is configured', function () {
Settings.backend = 'fs'
expect(PersistorFactory(Settings)).to.respondTo('wrappedMethod')
expect(PersistorFactory(Settings).wrappedMethod()).to.equal('FSPersistor')
})
it('should forward useSubdirectories=true to FSPersistor', function () {
Settings.backend = 'fs'
Settings.useSubdirectories = true
expect(PersistorFactory(Settings).settings.useSubdirectories).to.be.true
})
it('should forward useSubdirectories=false to FSPersistor', function () {
Settings.backend = 'fs'
Settings.useSubdirectories = false
expect(PersistorFactory(Settings).settings.useSubdirectories).to.be.false
})
it('should throw an error when the backend is not configured', function () {
try {
PersistorFactory(Settings)
} catch (err) {
expect(err.message).to.equal('no backend specified - config incomplete')
return
}
expect('should have caught an error').not.to.exist
})
it('should throw an error when the backend is unknown', function () {
Settings.backend = 'magic'
try {
PersistorFactory(Settings)
} catch (err) {
expect(err.message).to.equal('unknown backend')
expect(err.info.backend).to.equal('magic')
return
}
expect('should have caught an error').not.to.exist
})
})

File diff suppressed because it is too large Load Diff