fix: Attachments should not always be deleted with their original document (#1715)

* fix: Attachments should not be deleted when their original document is deleted when referenced elsewhere

* fix: Attachments deleted prematurely when docs are placed in trash

* mock

* restore hook, cascading delete was the issue
This commit is contained in:
Tom Moor 2020-12-14 19:55:22 -08:00 committed by GitHub
parent 3dbe54ac1e
commit e2e66954b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 207 additions and 57 deletions

View File

@ -4,44 +4,80 @@ import debug from "debug";
import Router from "koa-router";
import { AuthenticationError } from "../errors";
import { Document, Attachment } from "../models";
import { Op } from "../sequelize";
import { Op, sequelize } from "../sequelize";
import parseAttachmentIds from "../utils/parseAttachmentIds";
const router = new Router();
const log = debug("utils");
router.post("utils.gc", async (ctx) => {
const { token } = ctx.body;
const { token, limit = 500 } = ctx.body;
if (process.env.UTILS_SECRET !== token) {
throw new AuthenticationError("Invalid secret token");
}
log("Permanently deleting documents older than 30 days…");
const where = {
deletedAt: {
[Op.lt]: subDays(new Date(), 30),
},
};
log(`Permanently destroying upto ${limit} documents older than 30 days…`);
const documents = await Document.scope("withUnpublished").findAll({
attributes: ["id"],
where,
});
const documentIds = documents.map((d) => d.id);
await Attachment.destroy({
attributes: ["id", "teamId", "text"],
where: {
documentId: documentIds,
deletedAt: {
[Op.lt]: subDays(new Date(), 30),
},
},
paranoid: false,
limit,
});
const query = `
SELECT COUNT(id)
FROM documents
WHERE "searchVector" @@ to_tsquery('english', :query) AND
"teamId" = :teamId AND
"id" != :documentId
`;
for (const document of documents) {
const attachmentIds = parseAttachmentIds(document.text);
for (const attachmentId of attachmentIds) {
const [{ count }] = await sequelize.query(query, {
type: sequelize.QueryTypes.SELECT,
replacements: {
documentId: document.id,
teamId: document.teamId,
query: attachmentId,
},
});
if (parseInt(count) === 0) {
const attachment = await Attachment.findOne({
where: {
teamId: document.teamId,
id: attachmentId,
},
});
if (attachment) {
await attachment.destroy();
log(`Attachment ${attachmentId} deleted`);
} else {
log(`Unknown attachment ${attachmentId} ignored`);
}
}
}
}
await Document.scope("withUnpublished").destroy({
where,
where: {
id: documents.map((document) => document.id),
},
force: true,
});
log(`Deleted ${documentIds.length} documents`);
log(`Destroyed ${documents.length} documents`);
ctx.body = {
success: true,

View File

@ -2,69 +2,141 @@
import subDays from "date-fns/sub_days";
import TestServer from "fetch-test-server";
import app from "../app";
import { Document } from "../models";
import { sequelize } from "../sequelize";
import { buildDocument } from "../test/factories";
import { Attachment, Document } from "../models";
import { buildAttachment, buildDocument } from "../test/factories";
import { flushdb } from "../test/support";
const server = new TestServer(app.callback());
jest.mock("aws-sdk", () => {
const mS3 = { deleteObject: jest.fn().mockReturnThis(), promise: jest.fn() };
return {
S3: jest.fn(() => mS3),
Endpoint: jest.fn(),
};
});
beforeEach(() => flushdb());
afterAll(() => server.close());
describe("#utils.gc", () => {
it("should destroy documents deleted more than 30 days ago", async () => {
const document = await buildDocument({
await buildDocument({
publishedAt: new Date(),
deletedAt: subDays(new Date(), 60),
});
await sequelize.query(
`UPDATE documents SET "deletedAt" = '${subDays(
new Date(),
60
).toISOString()}' WHERE id = '${document.id}'`
);
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
const reloaded = await Document.scope().findOne({
where: {
id: document.id,
},
paranoid: false,
});
expect(res.status).toEqual(200);
expect(reloaded).toBe(null);
expect(await Document.scope().count()).toEqual(0);
});
it("should destroy attachments no longer referenced", async () => {
const document = await buildDocument({
publishedAt: subDays(new Date(), 90),
deletedAt: subDays(new Date(), 60),
});
const attachment = await buildAttachment({
teamId: document.teamId,
documentId: document.id,
});
document.text = `![text](${attachment.redirectUrl})`;
await document.save();
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
expect(res.status).toEqual(200);
expect(await Attachment.count()).toEqual(0);
expect(await Document.scope().count()).toEqual(0);
});
it("should handle unknown attachment ids", async () => {
const document = await buildDocument({
publishedAt: subDays(new Date(), 90),
deletedAt: subDays(new Date(), 60),
});
const attachment = await buildAttachment({
teamId: document.teamId,
documentId: document.id,
});
document.text = `![text](${attachment.redirectUrl})`;
await document.save();
// remove attachment so it no longer exists in the database, this is also
// representative of a corrupt attachment id in the doc or the regex returning
// an incorrect string
await attachment.destroy({ force: true });
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
expect(res.status).toEqual(200);
expect(await Attachment.count()).toEqual(0);
expect(await Document.scope().count()).toEqual(0);
});
it("should not destroy attachments referenced in other documents", async () => {
const document1 = await buildDocument();
const document = await buildDocument({
teamId: document1.teamId,
publishedAt: subDays(new Date(), 90),
deletedAt: subDays(new Date(), 60),
});
const attachment = await buildAttachment({
teamId: document1.teamId,
documentId: document.id,
});
document1.text = `![text](${attachment.redirectUrl})`;
await document1.save();
document.text = `![text](${attachment.redirectUrl})`;
await document.save();
expect(await Attachment.count()).toEqual(1);
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
expect(res.status).toEqual(200);
expect(await Attachment.count()).toEqual(1);
expect(await Document.scope().count()).toEqual(1);
});
it("should destroy draft documents deleted more than 30 days ago", async () => {
const document = await buildDocument({
await buildDocument({
publishedAt: undefined,
deletedAt: subDays(new Date(), 60),
});
await sequelize.query(
`UPDATE documents SET "deletedAt" = '${subDays(
new Date(),
60
).toISOString()}' WHERE id = '${document.id}'`
);
const res = await server.post("/api/utils.gc", {
body: {
token: process.env.UTILS_SECRET,
},
});
const reloaded = await Document.scope().findOne({
where: {
id: document.id,
},
paranoid: false,
});
expect(res.status).toEqual(200);
expect(reloaded).toBe(null);
expect(await Document.scope().count()).toEqual(0);
});
it("should require authentication", async () => {

View File

@ -0,0 +1,37 @@
const tableName = 'attachments';
// because of this issue in Sequelize the foreign key constraint may be named differently depending
// on when the previous migrations were ran https://github.com/sequelize/sequelize/pull/9890
const constraintNames = ['attachments_documentId_fkey', 'attachments_foreign_idx'];
module.exports = {
up: async (queryInterface, Sequelize) => {
let error;
for (const constraintName of constraintNames) {
try {
await queryInterface.sequelize.query(`alter table "${tableName}" drop constraint "${constraintName}"`)
return;
} catch (err) {
error = err;
}
}
throw error;
},
down: async (queryInterface, Sequelize) => {
let error;
for (const constraintName of constraintNames) {
try {
await queryInterface.sequelize.query(
`alter table "${tableName}"
add constraint "${constraintName}" foreign key("documentId") references "documents" ("id")
on delete cascade`
);
return;
} catch (err) {
error = err;
}
}
throw error;
},
};

View File

@ -1,6 +1,7 @@
// @flow
import { takeRight } from "lodash";
import { Attachment, Document, User } from "../models";
import parseAttachmentIds from "../utils/parseAttachmentIds";
import { getSignedImageUrl } from "../utils/s3";
import presentUser from "./user";
@ -8,13 +9,9 @@ type Options = {
isPublic?: boolean,
};
const attachmentRegex = /!\[.*?\]\(\/api\/attachments\.redirect\?id=(?<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/gi;
// replaces attachments.redirect urls with signed/authenticated url equivalents
async function replaceImageAttachments(text) {
const attachmentIds = [...text.matchAll(attachmentRegex)].map(
(match) => match.groups && match.groups.id
);
async function replaceImageAttachments(text: string) {
const attachmentIds = parseAttachmentIds(text);
for (const id of attachmentIds) {
const attachment = await Attachment.findByPk(id);

View File

@ -0,0 +1,8 @@
// @flow
const attachmentRegex = /!\[.*?\]\(\/api\/attachments\.redirect\?id=(?<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/gi;
export default function parseAttachmentIds(text: any): string[] {
return [...text.matchAll(attachmentRegex)].map(
(match) => match.groups && match.groups.id
);
}