diff --git a/.env.sample b/.env.sample index b0c38a84..37b626a5 100644 --- a/.env.sample +++ b/.env.sample @@ -94,6 +94,10 @@ FORCE_HTTPS=true # the maintainers ENABLE_UPDATES=true +# Override the maxium size of document imports, could be required if you have +# especially large Word documents with embedded imagery +MAXIMUM_IMPORT_SIZE=5120000 + # You may enable or disable debugging categories to increase the noisiness of # logs. The default is a good balance DEBUG=cache,presenters,events,emails,mailer,utils,multiplayer,server,services diff --git a/app/stores/DocumentsStore.js b/app/stores/DocumentsStore.js index ebb0ad63..ae949211 100644 --- a/app/stores/DocumentsStore.js +++ b/app/stores/DocumentsStore.js @@ -8,6 +8,7 @@ import naturalSort from "shared/utils/naturalSort"; import BaseStore from "stores/BaseStore"; import RootStore from "stores/RootStore"; import Document from "models/Document"; +import env from "env"; import type { FetchOptions, PaginationParams, SearchResult } from "types"; import { client } from "utils/ApiClient"; @@ -28,6 +29,7 @@ export default class DocumentsStore extends BaseStore { "text/html", "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/octet-stream", ]; constructor(rootStore: RootStore) { @@ -529,6 +531,14 @@ export default class DocumentsStore extends BaseStore { collectionId: string, options: ImportOptions ) => { + // file.type can be an empty string sometimes + if (file.type && !this.importFileTypes.includes(file.type)) { + throw new Error(`The selected file type is not supported (${file.type})`); + } + if (file.size > env.MAXIMUM_IMPORT_SIZE) { + throw new Error("The selected file was too large to import"); + } + const title = file.name.replace(/\.[^/.]+$/, ""); const formData = new FormData(); diff --git a/server/api/documents.js b/server/api/documents.js index 359420b8..232ddd56 100644 --- a/server/api/documents.js +++ b/server/api/documents.js @@ -5,6 +5,7 @@ import { subtractDate } from "../../shared/utils/date"; import documentCreator from "../commands/documentCreator"; import documentImporter from "../commands/documentImporter"; import documentMover from "../commands/documentMover"; +import env from "../env"; import { NotFoundError, InvalidRequestError, @@ -1179,6 +1180,10 @@ router.post("documents.import", auth(), async (ctx) => { const file: any = Object.values(ctx.request.files)[0]; ctx.assertPresent(file, "file is required"); + if (file.size > env.MAXIMUM_IMPORT_SIZE) { + throw new InvalidRequestError("The selected file was too large to import"); + } + ctx.assertUuid(collectionId, "collectionId must be an uuid"); if (parentDocumentId) { ctx.assertUuid(parentDocumentId, "parentDocumentId must be an uuid"); diff --git a/server/commands/documentImporter.js b/server/commands/documentImporter.js index da7d1c65..87b8a8cd 100644 --- a/server/commands/documentImporter.js +++ b/server/commands/documentImporter.js @@ -44,6 +44,10 @@ const importMapping: ImportableFile[] = [ type: "application/msword", getMarkdown: confluenceToMarkdown, }, + { + type: "application/octet-stream", + getMarkdown: docxToMarkdown, + }, { type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -142,6 +146,12 @@ export default async function documentImporter({ }): Promise<{ text: string, title: string }> { const fileInfo = importMapping.filter((item) => { if (item.type === file.type) { + if ( + file.type === "application/octet-stream" && + path.extname(file.name) !== ".docx" + ) { + return false; + } return true; } if (item.type === "text/markdown" && path.extname(file.name) === ".md") { diff --git a/server/commands/documentImporter.test.js b/server/commands/documentImporter.test.js index 2324c30d..fca2ecbd 100644 --- a/server/commands/documentImporter.test.js +++ b/server/commands/documentImporter.test.js @@ -37,6 +37,75 @@ describe("documentImporter", () => { expect(response.title).toEqual("images"); }); + it("should convert Word Document to markdown for application/octet-stream mimetype", async () => { + const user = await buildUser(); + const name = "images.docx"; + const file = new File({ + name, + type: "application/octet-stream", + path: path.resolve(__dirname, "..", "test", "fixtures", name), + }); + + const response = await documentImporter({ + user, + file, + ip, + }); + + const attachments = await Attachment.count(); + expect(attachments).toEqual(1); + + expect(response.text).toContain("This is a test document for images"); + expect(response.text).toContain("![](/api/attachments.redirect?id="); + expect(response.title).toEqual("images"); + }); + + it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => { + const user = await buildUser(); + const name = "normal.docx.txt"; + const file = new File({ + name, + type: "application/octet-stream", + path: path.resolve(__dirname, "..", "test", "fixtures", name), + }); + + let error; + try { + await documentImporter({ + user, + file, + ip, + }); + } catch (err) { + error = err.message; + } + + expect(error).toEqual("File type application/octet-stream not supported"); + }); + + it("should convert Word Document on Windows to markdown", async () => { + const user = await buildUser(); + const name = "images.docx"; + const file = new File({ + name, + type: "application/octet-stream", + path: path.resolve(__dirname, "..", "test", "fixtures", name), + }); + + const response = await documentImporter({ + user, + file, + ip, + }); + + const attachments = await Attachment.count(); + expect(attachments).toEqual(1); + + expect(response.text).toContain("This is a test document for images"); + expect(response.text).toContain("![](/api/attachments.redirect?id="); + expect(response.title).toEqual("images"); + }); + it("should convert HTML Document to markdown", async () => { const user = await buildUser(); const name = "webpage.html"; @@ -118,7 +187,7 @@ describe("documentImporter", () => { const name = "markdown.md"; const file = new File({ name, - type: "application/octet-stream", + type: "application/lol", path: path.resolve(__dirname, "..", "test", "fixtures", name), }); diff --git a/server/env.js b/server/env.js index 76621e57..204f6cc9 100644 --- a/server/env.js +++ b/server/env.js @@ -11,6 +11,7 @@ export default { TEAM_LOGO: process.env.TEAM_LOGO, SLACK_KEY: process.env.SLACK_KEY, SLACK_APP_ID: process.env.SLACK_APP_ID, + MAXIMUM_IMPORT_SIZE: process.env.MAXIMUM_IMPORT_SIZE || 1024 * 1000 * 5, SUBDOMAINS_ENABLED: process.env.SUBDOMAINS_ENABLED === "true", GOOGLE_ANALYTICS_ID: process.env.GOOGLE_ANALYTICS_ID, RELEASE: process.env.SOURCE_COMMIT || process.env.SOURCE_VERSION || undefined, diff --git a/server/test/fixtures/normal.docx.txt b/server/test/fixtures/normal.docx.txt new file mode 100644 index 00000000..2f297a1c Binary files /dev/null and b/server/test/fixtures/normal.docx.txt differ