fix: Add application/octet-stream as a valid mimetype for docx uploads (#2105)
* fix: Add application/octet-stream as a valid mimetype for docx uploads * fix: Include application/octet-stream in frontend filter fix: Add file size and file type guards * Validate .docx extension in files with application/octet-stream mimetype * refactor: Move MAXIMUM_IMPORT_SIZE to an optional environment config fix: Add file size check on server too Co-authored-by: Saumya Pandey <sp160899@gmail.com>
This commit is contained in:
parent
6ef8d9ddb3
commit
69802cc985
|
@ -94,6 +94,10 @@ FORCE_HTTPS=true
|
|||
# the maintainers
|
||||
ENABLE_UPDATES=true
|
||||
|
||||
# Override the maxium size of document imports, could be required if you have
|
||||
# especially large Word documents with embedded imagery
|
||||
MAXIMUM_IMPORT_SIZE=5120000
|
||||
|
||||
# You may enable or disable debugging categories to increase the noisiness of
|
||||
# logs. The default is a good balance
|
||||
DEBUG=cache,presenters,events,emails,mailer,utils,multiplayer,server,services
|
||||
|
|
|
@ -8,6 +8,7 @@ import naturalSort from "shared/utils/naturalSort";
|
|||
import BaseStore from "stores/BaseStore";
|
||||
import RootStore from "stores/RootStore";
|
||||
import Document from "models/Document";
|
||||
import env from "env";
|
||||
import type { FetchOptions, PaginationParams, SearchResult } from "types";
|
||||
import { client } from "utils/ApiClient";
|
||||
|
||||
|
@ -28,6 +29,7 @@ export default class DocumentsStore extends BaseStore<Document> {
|
|||
"text/html",
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/octet-stream",
|
||||
];
|
||||
|
||||
constructor(rootStore: RootStore) {
|
||||
|
@ -529,6 +531,14 @@ export default class DocumentsStore extends BaseStore<Document> {
|
|||
collectionId: string,
|
||||
options: ImportOptions
|
||||
) => {
|
||||
// file.type can be an empty string sometimes
|
||||
if (file.type && !this.importFileTypes.includes(file.type)) {
|
||||
throw new Error(`The selected file type is not supported (${file.type})`);
|
||||
}
|
||||
if (file.size > env.MAXIMUM_IMPORT_SIZE) {
|
||||
throw new Error("The selected file was too large to import");
|
||||
}
|
||||
|
||||
const title = file.name.replace(/\.[^/.]+$/, "");
|
||||
const formData = new FormData();
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import { subtractDate } from "../../shared/utils/date";
|
|||
import documentCreator from "../commands/documentCreator";
|
||||
import documentImporter from "../commands/documentImporter";
|
||||
import documentMover from "../commands/documentMover";
|
||||
import env from "../env";
|
||||
import {
|
||||
NotFoundError,
|
||||
InvalidRequestError,
|
||||
|
@ -1179,6 +1180,10 @@ router.post("documents.import", auth(), async (ctx) => {
|
|||
const file: any = Object.values(ctx.request.files)[0];
|
||||
ctx.assertPresent(file, "file is required");
|
||||
|
||||
if (file.size > env.MAXIMUM_IMPORT_SIZE) {
|
||||
throw new InvalidRequestError("The selected file was too large to import");
|
||||
}
|
||||
|
||||
ctx.assertUuid(collectionId, "collectionId must be an uuid");
|
||||
if (parentDocumentId) {
|
||||
ctx.assertUuid(parentDocumentId, "parentDocumentId must be an uuid");
|
||||
|
|
|
@ -44,6 +44,10 @@ const importMapping: ImportableFile[] = [
|
|||
type: "application/msword",
|
||||
getMarkdown: confluenceToMarkdown,
|
||||
},
|
||||
{
|
||||
type: "application/octet-stream",
|
||||
getMarkdown: docxToMarkdown,
|
||||
},
|
||||
{
|
||||
type:
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
|
@ -142,6 +146,12 @@ export default async function documentImporter({
|
|||
}): Promise<{ text: string, title: string }> {
|
||||
const fileInfo = importMapping.filter((item) => {
|
||||
if (item.type === file.type) {
|
||||
if (
|
||||
file.type === "application/octet-stream" &&
|
||||
path.extname(file.name) !== ".docx"
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (item.type === "text/markdown" && path.extname(file.name) === ".md") {
|
||||
|
|
|
@ -37,6 +37,75 @@ describe("documentImporter", () => {
|
|||
expect(response.title).toEqual("images");
|
||||
});
|
||||
|
||||
it("should convert Word Document to markdown for application/octet-stream mimetype", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
|
||||
const attachments = await Attachment.count();
|
||||
expect(attachments).toEqual(1);
|
||||
|
||||
expect(response.text).toContain("This is a test document for images");
|
||||
expect(response.text).toContain("![](/api/attachments.redirect?id=");
|
||||
expect(response.title).toEqual("images");
|
||||
});
|
||||
|
||||
it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "normal.docx.txt";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
|
||||
let error;
|
||||
try {
|
||||
await documentImporter({
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
error = err.message;
|
||||
}
|
||||
|
||||
expect(error).toEqual("File type application/octet-stream not supported");
|
||||
});
|
||||
|
||||
it("should convert Word Document on Windows to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
|
||||
const attachments = await Attachment.count();
|
||||
expect(attachments).toEqual(1);
|
||||
|
||||
expect(response.text).toContain("This is a test document for images");
|
||||
expect(response.text).toContain("![](/api/attachments.redirect?id=");
|
||||
expect(response.title).toEqual("images");
|
||||
});
|
||||
|
||||
it("should convert HTML Document to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "webpage.html";
|
||||
|
@ -118,7 +187,7 @@ describe("documentImporter", () => {
|
|||
const name = "markdown.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
type: "application/lol",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ export default {
|
|||
TEAM_LOGO: process.env.TEAM_LOGO,
|
||||
SLACK_KEY: process.env.SLACK_KEY,
|
||||
SLACK_APP_ID: process.env.SLACK_APP_ID,
|
||||
MAXIMUM_IMPORT_SIZE: process.env.MAXIMUM_IMPORT_SIZE || 1024 * 1000 * 5,
|
||||
SUBDOMAINS_ENABLED: process.env.SUBDOMAINS_ENABLED === "true",
|
||||
GOOGLE_ANALYTICS_ID: process.env.GOOGLE_ANALYTICS_ID,
|
||||
RELEASE: process.env.SOURCE_COMMIT || process.env.SOURCE_VERSION || undefined,
|
||||
|
|
Binary file not shown.
Reference in New Issue