diff --git a/server/api/utils.js b/server/api/utils.js index 8947635b..3bd36029 100644 --- a/server/api/utils.js +++ b/server/api/utils.js @@ -4,44 +4,80 @@ import debug from "debug"; import Router from "koa-router"; import { AuthenticationError } from "../errors"; import { Document, Attachment } from "../models"; -import { Op } from "../sequelize"; +import { Op, sequelize } from "../sequelize"; +import parseAttachmentIds from "../utils/parseAttachmentIds"; const router = new Router(); const log = debug("utils"); router.post("utils.gc", async (ctx) => { - const { token } = ctx.body; + const { token, limit = 500 } = ctx.body; if (process.env.UTILS_SECRET !== token) { throw new AuthenticationError("Invalid secret token"); } - log("Permanently deleting documents older than 30 days…"); - - const where = { - deletedAt: { - [Op.lt]: subDays(new Date(), 30), - }, - }; + log(`Permanently destroying upto ${limit} documents older than 30 days…`); const documents = await Document.scope("withUnpublished").findAll({ - attributes: ["id"], - where, - }); - const documentIds = documents.map((d) => d.id); - - await Attachment.destroy({ + attributes: ["id", "teamId", "text"], where: { - documentId: documentIds, + deletedAt: { + [Op.lt]: subDays(new Date(), 30), + }, }, + paranoid: false, + limit, }); + const query = ` + SELECT COUNT(id) + FROM documents + WHERE "searchVector" @@ to_tsquery('english', :query) AND + "teamId" = :teamId AND + "id" != :documentId +`; + + for (const document of documents) { + const attachmentIds = parseAttachmentIds(document.text); + + for (const attachmentId of attachmentIds) { + const [{ count }] = await sequelize.query(query, { + type: sequelize.QueryTypes.SELECT, + replacements: { + documentId: document.id, + teamId: document.teamId, + query: attachmentId, + }, + }); + + if (parseInt(count) === 0) { + const attachment = await Attachment.findOne({ + where: { + teamId: document.teamId, + id: attachmentId, + }, + }); + + if (attachment) { + await attachment.destroy(); + + log(`Attachment ${attachmentId} deleted`); + } else { + log(`Unknown attachment ${attachmentId} ignored`); + } + } + } + } + await Document.scope("withUnpublished").destroy({ - where, + where: { + id: documents.map((document) => document.id), + }, force: true, }); - log(`Deleted ${documentIds.length} documents`); + log(`Destroyed ${documents.length} documents`); ctx.body = { success: true, diff --git a/server/api/utils.test.js b/server/api/utils.test.js index 940f5418..752e8e25 100644 --- a/server/api/utils.test.js +++ b/server/api/utils.test.js @@ -2,69 +2,141 @@ import subDays from "date-fns/sub_days"; import TestServer from "fetch-test-server"; import app from "../app"; -import { Document } from "../models"; -import { sequelize } from "../sequelize"; -import { buildDocument } from "../test/factories"; +import { Attachment, Document } from "../models"; +import { buildAttachment, buildDocument } from "../test/factories"; import { flushdb } from "../test/support"; const server = new TestServer(app.callback()); +jest.mock("aws-sdk", () => { + const mS3 = { deleteObject: jest.fn().mockReturnThis(), promise: jest.fn() }; + return { + S3: jest.fn(() => mS3), + Endpoint: jest.fn(), + }; +}); + beforeEach(() => flushdb()); afterAll(() => server.close()); describe("#utils.gc", () => { it("should destroy documents deleted more than 30 days ago", async () => { - const document = await buildDocument({ + await buildDocument({ publishedAt: new Date(), + deletedAt: subDays(new Date(), 60), }); - await sequelize.query( - `UPDATE documents SET "deletedAt" = '${subDays( - new Date(), - 60 - ).toISOString()}' WHERE id = '${document.id}'` - ); - const res = await server.post("/api/utils.gc", { body: { token: process.env.UTILS_SECRET, }, }); - const reloaded = await Document.scope().findOne({ - where: { - id: document.id, - }, - paranoid: false, - }); + expect(res.status).toEqual(200); - expect(reloaded).toBe(null); + expect(await Document.scope().count()).toEqual(0); + }); + + it("should destroy attachments no longer referenced", async () => { + const document = await buildDocument({ + publishedAt: subDays(new Date(), 90), + deletedAt: subDays(new Date(), 60), + }); + + const attachment = await buildAttachment({ + teamId: document.teamId, + documentId: document.id, + }); + + document.text = `![text](${attachment.redirectUrl})`; + await document.save(); + + const res = await server.post("/api/utils.gc", { + body: { + token: process.env.UTILS_SECRET, + }, + }); + + expect(res.status).toEqual(200); + expect(await Attachment.count()).toEqual(0); + expect(await Document.scope().count()).toEqual(0); + }); + + it("should handle unknown attachment ids", async () => { + const document = await buildDocument({ + publishedAt: subDays(new Date(), 90), + deletedAt: subDays(new Date(), 60), + }); + + const attachment = await buildAttachment({ + teamId: document.teamId, + documentId: document.id, + }); + + document.text = `![text](${attachment.redirectUrl})`; + await document.save(); + + // remove attachment so it no longer exists in the database, this is also + // representative of a corrupt attachment id in the doc or the regex returning + // an incorrect string + await attachment.destroy({ force: true }); + + const res = await server.post("/api/utils.gc", { + body: { + token: process.env.UTILS_SECRET, + }, + }); + + expect(res.status).toEqual(200); + expect(await Attachment.count()).toEqual(0); + expect(await Document.scope().count()).toEqual(0); + }); + + it("should not destroy attachments referenced in other documents", async () => { + const document1 = await buildDocument(); + + const document = await buildDocument({ + teamId: document1.teamId, + publishedAt: subDays(new Date(), 90), + deletedAt: subDays(new Date(), 60), + }); + + const attachment = await buildAttachment({ + teamId: document1.teamId, + documentId: document.id, + }); + + document1.text = `![text](${attachment.redirectUrl})`; + await document1.save(); + + document.text = `![text](${attachment.redirectUrl})`; + await document.save(); + + expect(await Attachment.count()).toEqual(1); + + const res = await server.post("/api/utils.gc", { + body: { + token: process.env.UTILS_SECRET, + }, + }); + + expect(res.status).toEqual(200); + expect(await Attachment.count()).toEqual(1); + expect(await Document.scope().count()).toEqual(1); }); it("should destroy draft documents deleted more than 30 days ago", async () => { - const document = await buildDocument({ + await buildDocument({ publishedAt: undefined, + deletedAt: subDays(new Date(), 60), }); - await sequelize.query( - `UPDATE documents SET "deletedAt" = '${subDays( - new Date(), - 60 - ).toISOString()}' WHERE id = '${document.id}'` - ); - const res = await server.post("/api/utils.gc", { body: { token: process.env.UTILS_SECRET, }, }); - const reloaded = await Document.scope().findOne({ - where: { - id: document.id, - }, - paranoid: false, - }); expect(res.status).toEqual(200); - expect(reloaded).toBe(null); + expect(await Document.scope().count()).toEqual(0); }); it("should require authentication", async () => { diff --git a/server/migrations/20201211080408-attachment-no-cascade.js b/server/migrations/20201211080408-attachment-no-cascade.js new file mode 100644 index 00000000..557f4932 --- /dev/null +++ b/server/migrations/20201211080408-attachment-no-cascade.js @@ -0,0 +1,37 @@ +const tableName = 'attachments'; + +// because of this issue in Sequelize the foreign key constraint may be named differently depending +// on when the previous migrations were ran https://github.com/sequelize/sequelize/pull/9890 +const constraintNames = ['attachments_documentId_fkey', 'attachments_foreign_idx']; + +module.exports = { + up: async (queryInterface, Sequelize) => { + let error; + for (const constraintName of constraintNames) { + try { + await queryInterface.sequelize.query(`alter table "${tableName}" drop constraint "${constraintName}"`) + return; + } catch (err) { + error = err; + } + } + throw error; + }, + + down: async (queryInterface, Sequelize) => { + let error; + for (const constraintName of constraintNames) { + try { + await queryInterface.sequelize.query( + `alter table "${tableName}" + add constraint "${constraintName}" foreign key("documentId") references "documents" ("id") + on delete cascade` + ); + return; + } catch (err) { + error = err; + } + } + throw error; + }, +}; \ No newline at end of file diff --git a/server/presenters/document.js b/server/presenters/document.js index fbf46566..fe48c06e 100644 --- a/server/presenters/document.js +++ b/server/presenters/document.js @@ -1,6 +1,7 @@ // @flow import { takeRight } from "lodash"; import { Attachment, Document, User } from "../models"; +import parseAttachmentIds from "../utils/parseAttachmentIds"; import { getSignedImageUrl } from "../utils/s3"; import presentUser from "./user"; @@ -8,13 +9,9 @@ type Options = { isPublic?: boolean, }; -const attachmentRegex = /!\[.*?\]\(\/api\/attachments\.redirect\?id=(?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/gi; - // replaces attachments.redirect urls with signed/authenticated url equivalents -async function replaceImageAttachments(text) { - const attachmentIds = [...text.matchAll(attachmentRegex)].map( - (match) => match.groups && match.groups.id - ); +async function replaceImageAttachments(text: string) { + const attachmentIds = parseAttachmentIds(text); for (const id of attachmentIds) { const attachment = await Attachment.findByPk(id); diff --git a/server/utils/parseAttachmentIds.js b/server/utils/parseAttachmentIds.js new file mode 100644 index 00000000..1b937723 --- /dev/null +++ b/server/utils/parseAttachmentIds.js @@ -0,0 +1,8 @@ +// @flow +const attachmentRegex = /!\[.*?\]\(\/api\/attachments\.redirect\?id=(?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/gi; + +export default function parseAttachmentIds(text: any): string[] { + return [...text.matchAll(attachmentRegex)].map( + (match) => match.groups && match.groups.id + ); +}