From 69802cc9850a07fd986a9dcfda1b50549f81debd Mon Sep 17 00:00:00 2001 From: Tom Moor Date: Wed, 5 May 2021 18:48:37 -0700 Subject: [PATCH] fix: Add application/octet-stream as a valid mimetype for docx uploads (#2105) * fix: Add application/octet-stream as a valid mimetype for docx uploads * fix: Include application/octet-stream in frontend filter fix: Add file size and file type guards * Validate .docx extension in files with application/octet-stream mimetype * refactor: Move MAXIMUM_IMPORT_SIZE to an optional environment config fix: Add file size check on server too Co-authored-by: Saumya Pandey --- .env.sample | 4 ++ app/stores/DocumentsStore.js | 10 ++++ server/api/documents.js | 5 ++ server/commands/documentImporter.js | 10 ++++ server/commands/documentImporter.test.js | 71 ++++++++++++++++++++++- server/env.js | 1 + server/test/fixtures/normal.docx.txt | Bin 0 -> 6299 bytes 7 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 server/test/fixtures/normal.docx.txt diff --git a/.env.sample b/.env.sample index b0c38a84..37b626a5 100644 --- a/.env.sample +++ b/.env.sample @@ -94,6 +94,10 @@ FORCE_HTTPS=true # the maintainers ENABLE_UPDATES=true +# Override the maxium size of document imports, could be required if you have +# especially large Word documents with embedded imagery +MAXIMUM_IMPORT_SIZE=5120000 + # You may enable or disable debugging categories to increase the noisiness of # logs. The default is a good balance DEBUG=cache,presenters,events,emails,mailer,utils,multiplayer,server,services diff --git a/app/stores/DocumentsStore.js b/app/stores/DocumentsStore.js index ebb0ad63..ae949211 100644 --- a/app/stores/DocumentsStore.js +++ b/app/stores/DocumentsStore.js @@ -8,6 +8,7 @@ import naturalSort from "shared/utils/naturalSort"; import BaseStore from "stores/BaseStore"; import RootStore from "stores/RootStore"; import Document from "models/Document"; +import env from "env"; import type { FetchOptions, PaginationParams, SearchResult } from "types"; import { client } from "utils/ApiClient"; @@ -28,6 +29,7 @@ export default class DocumentsStore extends BaseStore { "text/html", "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/octet-stream", ]; constructor(rootStore: RootStore) { @@ -529,6 +531,14 @@ export default class DocumentsStore extends BaseStore { collectionId: string, options: ImportOptions ) => { + // file.type can be an empty string sometimes + if (file.type && !this.importFileTypes.includes(file.type)) { + throw new Error(`The selected file type is not supported (${file.type})`); + } + if (file.size > env.MAXIMUM_IMPORT_SIZE) { + throw new Error("The selected file was too large to import"); + } + const title = file.name.replace(/\.[^/.]+$/, ""); const formData = new FormData(); diff --git a/server/api/documents.js b/server/api/documents.js index 359420b8..232ddd56 100644 --- a/server/api/documents.js +++ b/server/api/documents.js @@ -5,6 +5,7 @@ import { subtractDate } from "../../shared/utils/date"; import documentCreator from "../commands/documentCreator"; import documentImporter from "../commands/documentImporter"; import documentMover from "../commands/documentMover"; +import env from "../env"; import { NotFoundError, InvalidRequestError, @@ -1179,6 +1180,10 @@ router.post("documents.import", auth(), async (ctx) => { const file: any = Object.values(ctx.request.files)[0]; ctx.assertPresent(file, "file is required"); + if (file.size > env.MAXIMUM_IMPORT_SIZE) { + throw new InvalidRequestError("The selected file was too large to import"); + } + ctx.assertUuid(collectionId, "collectionId must be an uuid"); if (parentDocumentId) { ctx.assertUuid(parentDocumentId, "parentDocumentId must be an uuid"); diff --git a/server/commands/documentImporter.js b/server/commands/documentImporter.js index da7d1c65..87b8a8cd 100644 --- a/server/commands/documentImporter.js +++ b/server/commands/documentImporter.js @@ -44,6 +44,10 @@ const importMapping: ImportableFile[] = [ type: "application/msword", getMarkdown: confluenceToMarkdown, }, + { + type: "application/octet-stream", + getMarkdown: docxToMarkdown, + }, { type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -142,6 +146,12 @@ export default async function documentImporter({ }): Promise<{ text: string, title: string }> { const fileInfo = importMapping.filter((item) => { if (item.type === file.type) { + if ( + file.type === "application/octet-stream" && + path.extname(file.name) !== ".docx" + ) { + return false; + } return true; } if (item.type === "text/markdown" && path.extname(file.name) === ".md") { diff --git a/server/commands/documentImporter.test.js b/server/commands/documentImporter.test.js index 2324c30d..fca2ecbd 100644 --- a/server/commands/documentImporter.test.js +++ b/server/commands/documentImporter.test.js @@ -37,6 +37,75 @@ describe("documentImporter", () => { expect(response.title).toEqual("images"); }); + it("should convert Word Document to markdown for application/octet-stream mimetype", async () => { + const user = await buildUser(); + const name = "images.docx"; + const file = new File({ + name, + type: "application/octet-stream", + path: path.resolve(__dirname, "..", "test", "fixtures", name), + }); + + const response = await documentImporter({ + user, + file, + ip, + }); + + const attachments = await Attachment.count(); + expect(attachments).toEqual(1); + + expect(response.text).toContain("This is a test document for images"); + expect(response.text).toContain("![](/api/attachments.redirect?id="); + expect(response.title).toEqual("images"); + }); + + it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => { + const user = await buildUser(); + const name = "normal.docx.txt"; + const file = new File({ + name, + type: "application/octet-stream", + path: path.resolve(__dirname, "..", "test", "fixtures", name), + }); + + let error; + try { + await documentImporter({ + user, + file, + ip, + }); + } catch (err) { + error = err.message; + } + + expect(error).toEqual("File type application/octet-stream not supported"); + }); + + it("should convert Word Document on Windows to markdown", async () => { + const user = await buildUser(); + const name = "images.docx"; + const file = new File({ + name, + type: "application/octet-stream", + path: path.resolve(__dirname, "..", "test", "fixtures", name), + }); + + const response = await documentImporter({ + user, + file, + ip, + }); + + const attachments = await Attachment.count(); + expect(attachments).toEqual(1); + + expect(response.text).toContain("This is a test document for images"); + expect(response.text).toContain("![](/api/attachments.redirect?id="); + expect(response.title).toEqual("images"); + }); + it("should convert HTML Document to markdown", async () => { const user = await buildUser(); const name = "webpage.html"; @@ -118,7 +187,7 @@ describe("documentImporter", () => { const name = "markdown.md"; const file = new File({ name, - type: "application/octet-stream", + type: "application/lol", path: path.resolve(__dirname, "..", "test", "fixtures", name), }); diff --git a/server/env.js b/server/env.js index 76621e57..204f6cc9 100644 --- a/server/env.js +++ b/server/env.js @@ -11,6 +11,7 @@ export default { TEAM_LOGO: process.env.TEAM_LOGO, SLACK_KEY: process.env.SLACK_KEY, SLACK_APP_ID: process.env.SLACK_APP_ID, + MAXIMUM_IMPORT_SIZE: process.env.MAXIMUM_IMPORT_SIZE || 1024 * 1000 * 5, SUBDOMAINS_ENABLED: process.env.SUBDOMAINS_ENABLED === "true", GOOGLE_ANALYTICS_ID: process.env.GOOGLE_ANALYTICS_ID, RELEASE: process.env.SOURCE_COMMIT || process.env.SOURCE_VERSION || undefined, diff --git a/server/test/fixtures/normal.docx.txt b/server/test/fixtures/normal.docx.txt new file mode 100644 index 0000000000000000000000000000000000000000..2f297a1c9c91376a8f73ee9e5a2d60113ac9c11d GIT binary patch literal 6299 zcmaJ_by$?|wjH`t8iAp^B%~yzyFnU;9y%o?1ZhD^!2t<}l!l>0T3{$aT1vV>THu1` zoU1?2z3zGDd*+YX@AvGr>s?Dj83~yPfR2t1sO5XF1^CUdZhxD(TZ28gx$d4NiK@y- zQrIu{u83uJ=fLcdy@Mxo(k^tzXklsFyTgNMeD)jA9o1h0-?|8?*IP@AdBGz0g{CFtnM^ecU#$;ofXIxe##6_(36f|O< z`?S#Q+<`v@+tKgy8lcAQvkQcNbR=ZYvjeYc5|Wu#=XNdIyl` z*{ME%Q*E+IQvytRjDy#$yOTW51>KhF2@3+OA&A?lwxO#zW3Syu2(5A|z9@&&fY~vF zKJh&ZBHWSID=l|oMXW{c)sYI8-Z+JOHBy94muxrAN=>+}5oNh)XpD4%ckK0-YAF4ptP=pj-=HyHI6(#$g#1$hKz=%(c{#UH1~l{5f8^6F$s z`bQ{A@4uUPuDiP(q9*Llw&8<1_ylih;o8leM^H71e|eB%SAG9-=>SS8klJ8EjHUw$ z)pia!pC%y}N6jW#s7vXNNI6}?AaL#StjD-ymF&S&&#=y#hU*WU%lhV~AiFhUk=xc? zF%?XEXp)7TAkn<9J(#<4e;KVQLgywD_xV#IzDRR+QgJ+N!(MV?s$CCe1_hdYMu63N zza8RUXrZwOWS@-c4}Xq0oIhoKhH#e>x@qhb9RvU%`mZU$|2-uZuC8|($#?~KZs)-d z+Y5~DT~dNPmSlC0y~xc>h#mtp(@q!?>PIW*8WX z*XJp9szW0qsBHyB`BVf=Od3)%M$WtK5EO}!WJ**<)|J}C=VM9B`{XuojT9H7$|$!C zCr3;rRY9*I{<>Dofgp}e9@e-m29$L+XQ)UY<~hU5wb^3<;N>uAMml(g1f%?i!vT$x zc4}OEjb=WsKw!l}W+~@I7K=$Z!$L@I7SfN#E0DtFs9oPmc9%~m=`nk!=P+&p%Ma_; zaONZSc5p8`k+(n9?Ba8kOmaEa8=>-T)H`&%K}Akux3RzeD>`@M;U_x3;{w&0dHI$H zKX6YU*vc%KTne-M)biNmIJ;OuQKF3(^}|J81;*QFfjw{fP2aO^DxuHZ9}>3hj-{bC zOIG2!58-KGI2WPEm>AHJe#m(4bc3*iZw%a21UDnzlzwQL%ft~)F zM9(a&o+qURMywy(gtRY-`HFbFILT_^5KG}16m1i|PZBR;3jKi|&hM`I!bCRpq z@TszqsO~-WRfC?v!wQpYXppA+^u&D5G-Uw?S$9TDI&mM-a`&@R7hQh|`CWZiD6!l=H)Fs`(0NDmNIuHksMx>20aKoeCxnpU=P{uJX;X zr44VzjpEKb+*vbjE}ObJ2)~i9Sd!1h8LsM-lgweLqZi!{uPQ9HsP5PTbhv)k|CH2H zr(u}5mQXXGNC89_9ksqszku|GRK}lpQ0qlHN!_yRBFM)fTzNk`YHg6&gZqU7%`&~m zf-B_w)sBO$e0u8{l9O*W=zsa zb-#UAMOj4nLvJYus~@qDxKlY!^Uvo5_o11hUeS67yCFaxR~Cpa1mgo}mi!uuL?*GK zqrK4`CYD!A8eF)n%DYIYwuWxil_{yC_(3tN#GW}Y;g$5!^L>tQ?{KaSlo!e3&EEz# zVqtG+eeo2z@vJBE3HmwYeuSq9P}~-kAy%GBz@nA*lgU!jbo#aY&iJ4A(LR;%_JA|CQS_|dAN@Wz$|`D+-g3!o3v>4!tdvmIkrk-#IEF`=lbZ(TBa*jid!GKfz5TSX|*nHjWiS zWYq42w`rH2aohVN`mUpfRKz|Ve}P~~M#<2a8N^s}pp$09lIqA@Adptn*hFVsmi}0e zYD4#k!%C|}lWQH$rVN}yrZvFDiZEO(-)=AF{k%fAM^da33I`xzp;kWbD#&L#7|vje zD`!%Z#DqTyH)Os#da|{il4G3nhO38AJl9}GcW~9Tb>Vdgy1g+O!5OFi5@K44kpaVa z{3AG4pLEEkIP{ybB<$dT{(%}VB*y*FMH7jjD>)6tt6=&EI(gyh3H5l6z_fl3CAtoh z{X{Yc6XZlBk9>irg}zaQ)B{-@Yby+jNKHGC2fcaNRi9a(a&Z_~4E)}&i+`~qVu#kE zdt3s@HX{Xoj}Lxe_m~`l-lyh5lgwAOd*2+l_;{{j>~z{2_*ISm>tNvmPM`1P<>$E# zJ`4G)FVjl%qk0$eZPbF6cGT-tLyEv33H`m52xf8;JJN3F&hH`I$ z$t$L=fbo4C%-FlXz!SN0APppD4TA-V7O?9tV_L+STKAaHdN{XHp zr@NF6d&x*yU9ax7V?-@_#C@6e^nR)72)#XoNPqfA=*7kD}OMn0-^36%$$iv%1nc6&wKJz?|m0Z9iqy3B;kX8 z99}cp?oEv%FuyOQF(f?0)ojvbd=N8#w<**uC`^-)0Dx(dzcn=Re;V4;&f3YE`{$GQ zXImR8yUYmQZ{5?zZ>X&VvwMY#$Zk*We96Nhtd)tceyj{;(FATVwBYQxwFO(um3Lrn z!#c`KB(b(f1Jl7&Fx++hNi)3uWYx8C@Ye8!dD7V4&9j8$GE>o5vT0^%E$Uj%HFvl) z!%3a`a+cE^)pzci4~|jk2{qr8_C(rB*Zz+#~F$A|NHu;~@^& z!3qEv)7eZo))0%f^TRY^=|^F^v2tVdb_tc$U-Ae1v0E$|KcmD)nQ$i+XoC!bh7^Oo zJ@!tMynIu`nXr8H96?xz#L|GxcNpd{)l)f9HqA(&t!yXI@a%+j9TPFAEOR8v+<;Kz zYIQROo*49Ktu=D%*l#XV`>-D{dDNz5i!ifY3mHt#N9~J+CvZJjI6iU5K>Yax;o2#n6KX zjG)db9Qr1d{V)6oHY`T{r4Vtwes|7xyt;KHAQqP%znnv20>KGjqAL~`MHOX1Ahd@0 zJdv3+Rm)JpFO;dsDKOc*Ad>vsaSV-VtM7}K)Z!9;XGbHs^742=4>ac5+_JQrFLsYw zGXp}e15S2|d(6@gw0dq%mh)S$55vg|JQw=QGOw?HZ1k5&TrOYspq{;Glc5sNB6zhf zC@d)|hS-6P*sB4Fv&6S7(gt^ibn>d%W3OjovaiqF8z@JZpw-3udT$s}rEO~&jX2y# zhF5P4&_pgryg;({p(A`G?`s%rGlE{lkpaCQq(Wm11Tn_(rl%iCmPuD}EQ;i6| zdzY$}X0p<{<~Ej!23>Iz2L_A11-vicyle{BwbOF)0QGa=)!VGMQy?0Me0TB4mItR_f{ZRduZyKqoR`D#L^v*}^X+b0avd4- z1PKJpt#>b`n9p+@HQyacy|ha>MjvjtGB#_C zs{fclx=59y1H$IdADBWo^u4SRK88nX6`Y+0QHzo5KgX*nUz1UmqhBgyWUsP6h>a&E zXv_`@egWq2=2@Q`sOrxr6bTFzAxM~k(;XCr8;y1UV1Juq!=o&14Dwfw3B(=hhT=}C zXb+j@Tnek&8)vcZHaaS$*Sm^4olQY8yxl@H({r6-u!!B)r4<#LTY>{6l2H6;Sz>e; zJ4Ywrg{7dNFSYK}Pm{2uYh;iZSUg3DF{NR;7Hb#-kcO5=YUIR$c{=(`=X(j;_XHJ+ zeHd%zr5CH;620toJ(8+Y9T&j9a5?x0eOel4%T(|*6<$^-5a`5i5XeWYR)yfWv}qHw zwL6+&`>a@XAvI)_+iq6J7wj!YwNDi(&UX3~Im)+CH`%OS2sy(j-Ho+4Xp3m<3E^Ih zn5e^`Gl$#B!P4L!cw)iv)K`BdhS9b@kY(cON1LJJ^XkOz-0f0|be_Jqw|5-3LF3z4h7M9?jQbn_-lv>9vJv+ke_t~sQ z3(H~BW$eT_mh4A@4dzvV%IM1!d-vQv>q#fbND)8uw-w_PH7yl~5L2NFW?~Azv(^II zn~{qU9Xg!`+_0d?XJLL|=J*!Op}#U$IpnDRO4d&eavBb{1baXY9^y(| zrgSmOkETe_H*P?2&akGp5^bd|L99Q_5forT8K-XAD*E1+5W|i(52ucov;PpH_FXtc zIoqe3HU{TpXXz5Xc$-BS#)qP=CpZMR$e?^4iDuIFLx>1pq5`}0z6tNgvO=;@bVJ9Ka%uI%&M#}|e+r50?cS+u`6q@#^Ynp=; z%E}|+5Fj?bx?J8EB#465i7FCp@Rnc-{Lp7B+mc9=gtURyU0`gLBazKk4Yhko{(5%#u`3Z_uS?B4XMM}CtirrPiHpJhGc_EF@QdvmYs|0K zmJZh0gvw4^RA05yraaGGYpR1D$=1~6vbeY&H64^9BBzp`@Oa737c9p6UGJ_^-d%(k zc=6oPZ(+>2{r>+j-n*?+BbnP8y;Tv+be_6edzjo^gPN23?+R7+^eVd7D7nr$1Ycs;V&Bh;2BKRsX&3|ZCaVY?%=~aok1|30316d^ougWQy z5Py4mkYJiL<(bS4BMAs?o+bs6hc;)A9Wk=3@>xk)LHX4xf@@22QV)Fe5Ss7Dm$5t) z3C6XLqz0wWOMTMwQrLZXR=1J^VW@R7drTZ=Xrw%dm}>uF)HLJyG!1@f&E#9I9j1Pb zsqbgd>gy%{(D89<$)YzL;(Pi{hU>5SQO3j;8#0Iz2GX$NRV(u`Vat(rR~}ic%AXJ(>0vUGcgkYn%U4Ro-kWpxA(_0UiE$n_ z%Vu7A=}eaP{PfJxGSoztw-9Y@-@@p*#0^o&9#DKi5n;#6{SZG64y=6K7TWx!^{hKW zdhbKT1TJmXDmTfzun&E#`}F;h&+2y>QWEHfP6btmopGUzQ!{$KZh3w?ij|1Q(qNmak>9`fIX zt3OZrotOVJ4BlI`{!8Tf)BSfAa7W&MSv1