fix: Attachments should not always be deleted with their original document (#1715)

* fix: Attachments should not be deleted when their original document is deleted when referenced elsewhere

* fix: Attachments deleted prematurely when docs are placed in trash

* mock

* restore hook, cascading delete was the issue
This commit is contained in:
Tom Moor
2020-12-14 19:55:22 -08:00
committed by GitHub
parent 3dbe54ac1e
commit e2e66954b5
5 changed files with 207 additions and 57 deletions

View File

@ -4,44 +4,80 @@ import debug from "debug";
import Router from "koa-router"; import Router from "koa-router";
import { AuthenticationError } from "../errors"; import { AuthenticationError } from "../errors";
import { Document, Attachment } from "../models"; import { Document, Attachment } from "../models";
import { Op } from "../sequelize"; import { Op, sequelize } from "../sequelize";
import parseAttachmentIds from "../utils/parseAttachmentIds";
const router = new Router(); const router = new Router();
const log = debug("utils"); const log = debug("utils");
router.post("utils.gc", async (ctx) => { router.post("utils.gc", async (ctx) => {
const { token } = ctx.body; const { token, limit = 500 } = ctx.body;
if (process.env.UTILS_SECRET !== token) { if (process.env.UTILS_SECRET !== token) {
throw new AuthenticationError("Invalid secret token"); throw new AuthenticationError("Invalid secret token");
} }
log("Permanently deleting documents older than 30 days…"); log(`Permanently destroying upto ${limit} documents older than 30 days…`);
const where = {
deletedAt: {
[Op.lt]: subDays(new Date(), 30),
},
};
const documents = await Document.scope("withUnpublished").findAll({ const documents = await Document.scope("withUnpublished").findAll({
attributes: ["id"], attributes: ["id", "teamId", "text"],
where,
});
const documentIds = documents.map((d) => d.id);
await Attachment.destroy({
where: { where: {
documentId: documentIds, deletedAt: {
[Op.lt]: subDays(new Date(), 30),
},
}, },
paranoid: false,
limit,
}); });
const query = `
SELECT COUNT(id)
FROM documents
WHERE "searchVector" @@ to_tsquery('english', :query) AND
"teamId" = :teamId AND
"id" != :documentId
`;
for (const document of documents) {
const attachmentIds = parseAttachmentIds(document.text);
for (const attachmentId of attachmentIds) {
const [{ count }] = await sequelize.query(query, {
type: sequelize.QueryTypes.SELECT,
replacements: {
documentId: document.id,
teamId: document.teamId,
query: attachmentId,
},
});
if (parseInt(count) === 0) {
const attachment = await Attachment.findOne({
where: {
teamId: document.teamId,
id: attachmentId,
},
});
if (attachment) {
await attachment.destroy();
log(`Attachment ${attachmentId} deleted`);
} else {
log(`Unknown attachment ${attachmentId} ignored`);
}
}
}
}
await Document.scope("withUnpublished").destroy({ await Document.scope("withUnpublished").destroy({
where, where: {
id: documents.map((document) => document.id),
},
force: true, force: true,
}); });
log(`Deleted ${documentIds.length} documents`); log(`Destroyed ${documents.length} documents`);
ctx.body = { ctx.body = {
success: true, success: true,

View File

@ -2,69 +2,141 @@
import subDays from "date-fns/sub_days"; import subDays from "date-fns/sub_days";
import TestServer from "fetch-test-server"; import TestServer from "fetch-test-server";
import app from "../app"; import app from "../app";
import { Document } from "../models"; import { Attachment, Document } from "../models";
import { sequelize } from "../sequelize"; import { buildAttachment, buildDocument } from "../test/factories";
import { buildDocument } from "../test/factories";
import { flushdb } from "../test/support"; import { flushdb } from "../test/support";
const server = new TestServer(app.callback()); const server = new TestServer(app.callback());
jest.mock("aws-sdk", () => {
const mS3 = { deleteObject: jest.fn().mockReturnThis(), promise: jest.fn() };
return {
S3: jest.fn(() => mS3),
Endpoint: jest.fn(),
};
});
beforeEach(() => flushdb()); beforeEach(() => flushdb());
afterAll(() => server.close()); afterAll(() => server.close());
describe("#utils.gc", () => { describe("#utils.gc", () => {
it("should destroy documents deleted more than 30 days ago", async () => { it("should destroy documents deleted more than 30 days ago", async () => {
const document = await buildDocument({ await buildDocument({
publishedAt: new Date(), publishedAt: new Date(),
deletedAt: subDays(new Date(), 60),
}); });
await sequelize.query(
`UPDATE documents SET "deletedAt" = '${subDays(
new Date(),
60
).toISOString()}' WHERE id = '${document.id}'`
);
const res = await server.post("/api/utils.gc", { const res = await server.post("/api/utils.gc", {
body: { body: {
token: process.env.UTILS_SECRET, token: process.env.UTILS_SECRET,
}, },
}); });
const reloaded = await Document.scope().findOne({
where: {
id: document.id,
},
paranoid: false,
});
expect(res.status).toEqual(200); expect(res.status).toEqual(200);
expect(reloaded).toBe(null); expect(await Document.scope().count()).toEqual(0);
});
it("should destroy attachments no longer referenced", async () => {
const document = await buildDocument({
publishedAt: subDays(new Date(), 90),
deletedAt: subDays(new Date(), 60),
});
const attachment = await buildAttachment({
teamId: document.teamId,
documentId: document.id,
});
document.text = `![text](${attachment.redirectUrl})`;
await document.save();
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
expect(res.status).toEqual(200);
expect(await Attachment.count()).toEqual(0);
expect(await Document.scope().count()).toEqual(0);
});
it("should handle unknown attachment ids", async () => {
const document = await buildDocument({
publishedAt: subDays(new Date(), 90),
deletedAt: subDays(new Date(), 60),
});
const attachment = await buildAttachment({
teamId: document.teamId,
documentId: document.id,
});
document.text = `![text](${attachment.redirectUrl})`;
await document.save();
// remove attachment so it no longer exists in the database, this is also
// representative of a corrupt attachment id in the doc or the regex returning
// an incorrect string
await attachment.destroy({ force: true });
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
expect(res.status).toEqual(200);
expect(await Attachment.count()).toEqual(0);
expect(await Document.scope().count()).toEqual(0);
});
it("should not destroy attachments referenced in other documents", async () => {
const document1 = await buildDocument();
const document = await buildDocument({
teamId: document1.teamId,
publishedAt: subDays(new Date(), 90),
deletedAt: subDays(new Date(), 60),
});
const attachment = await buildAttachment({
teamId: document1.teamId,
documentId: document.id,
});
document1.text = `![text](${attachment.redirectUrl})`;
await document1.save();
document.text = `![text](${attachment.redirectUrl})`;
await document.save();
expect(await Attachment.count()).toEqual(1);
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
expect(res.status).toEqual(200);
expect(await Attachment.count()).toEqual(1);
expect(await Document.scope().count()).toEqual(1);
}); });
it("should destroy draft documents deleted more than 30 days ago", async () => { it("should destroy draft documents deleted more than 30 days ago", async () => {
const document = await buildDocument({ await buildDocument({
publishedAt: undefined, publishedAt: undefined,
deletedAt: subDays(new Date(), 60),
}); });
await sequelize.query(
`UPDATE documents SET "deletedAt" = '${subDays(
new Date(),
60
).toISOString()}' WHERE id = '${document.id}'`
);
const res = await server.post("/api/utils.gc", { const res = await server.post("/api/utils.gc", {
body: { body: {
token: process.env.UTILS_SECRET, token: process.env.UTILS_SECRET,
}, },
}); });
const reloaded = await Document.scope().findOne({
where: {
id: document.id,
},
paranoid: false,
});
expect(res.status).toEqual(200); expect(res.status).toEqual(200);
expect(reloaded).toBe(null); expect(await Document.scope().count()).toEqual(0);
}); });
it("should require authentication", async () => { it("should require authentication", async () => {

View File

@ -0,0 +1,37 @@
const tableName = 'attachments';
// because of this issue in Sequelize the foreign key constraint may be named differently depending
// on when the previous migrations were ran https://github.com/sequelize/sequelize/pull/9890
const constraintNames = ['attachments_documentId_fkey', 'attachments_foreign_idx'];
module.exports = {
up: async (queryInterface, Sequelize) => {
let error;
for (const constraintName of constraintNames) {
try {
await queryInterface.sequelize.query(`alter table "${tableName}" drop constraint "${constraintName}"`)
return;
} catch (err) {
error = err;
}
}
throw error;
},
down: async (queryInterface, Sequelize) => {
let error;
for (const constraintName of constraintNames) {
try {
await queryInterface.sequelize.query(
`alter table "${tableName}"
add constraint "${constraintName}" foreign key("documentId") references "documents" ("id")
on delete cascade`
);
return;
} catch (err) {
error = err;
}
}
throw error;
},
};

View File

@ -1,6 +1,7 @@
// @flow // @flow
import { takeRight } from "lodash"; import { takeRight } from "lodash";
import { Attachment, Document, User } from "../models"; import { Attachment, Document, User } from "../models";
import parseAttachmentIds from "../utils/parseAttachmentIds";
import { getSignedImageUrl } from "../utils/s3"; import { getSignedImageUrl } from "../utils/s3";
import presentUser from "./user"; import presentUser from "./user";
@ -8,13 +9,9 @@ type Options = {
isPublic?: boolean, isPublic?: boolean,
}; };
const attachmentRegex = /!\[.*?\]\(\/api\/attachments\.redirect\?id=(?<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/gi;
// replaces attachments.redirect urls with signed/authenticated url equivalents // replaces attachments.redirect urls with signed/authenticated url equivalents
async function replaceImageAttachments(text) { async function replaceImageAttachments(text: string) {
const attachmentIds = [...text.matchAll(attachmentRegex)].map( const attachmentIds = parseAttachmentIds(text);
(match) => match.groups && match.groups.id
);
for (const id of attachmentIds) { for (const id of attachmentIds) {
const attachment = await Attachment.findByPk(id); const attachment = await Attachment.findByPk(id);

View File

@ -0,0 +1,8 @@
// @flow
const attachmentRegex = /!\[.*?\]\(\/api\/attachments\.redirect\?id=(?<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/gi;
export default function parseAttachmentIds(text: any): string[] {
return [...text.matchAll(attachmentRegex)].map(
(match) => match.groups && match.groups.id
);
}