From 866fa5ebbaa22e71b751a480adb5d5bc2c1f2e68 Mon Sep 17 00:00:00 2001
From: Linnea <linnealovespie@proton.me>
Date: Mon, 22 Sep 2025 20:17:10 -0700
Subject: [PATCH 1/4] cleanup

---
 README.md                      |  17 ++--
 experiments/aemp.org           |   8 ++
 experiments/gre_apartments.ods | Bin 23441 -> 23880 bytes
 processors/corp_owners.py      | 174 ++++++++++++++++++++++++---------
 processors/gre-llc.py          |  34 +++----
 processors/merge.py            |   5 +-
 processors/parcel_owners.py    |  46 +++------
 processors/scrape.py           |  42 ++------
 requirements-conda.txt         |  69 +++++++++++++
 9 files changed, 249 insertions(+), 146 deletions(-)
 create mode 100644 requirements-conda.txt

diff --git a/README.md b/README.md
index 88926d4..82fb549 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 # aemp-seattle
 
-Initial repository for building up Seattle database for anti-eviction mapping. 
+Initial repository for building up Seattle database for anti-eviction mapping.
 
-Modelled off the [evictorbase pipeline code](https://github.com/antievictionmappingproject/eb-data-pipeline). 
+Modelled off the [evictorbase pipeline code](https://github.com/antievictionmappingproject/eb-data-pipeline).
 
-Relevant but not 1-1 walkthrough of how to programmatically find building owners: [350 Seattle BEPS Repo](https://github.com/BenBagBag/350_seattle_building_ownership/blob/main/How%20to%20Find%20Building%20Owners.ipynb). 
+Relevant but not 1-1 walkthrough of how to programmatically find building owners: [350 Seattle BEPS Repo](https://github.com/BenBagBag/350_seattle_building_ownership/blob/main/How%20to%20Find%20Building%20Owners.ipynb).
 
 [AEMP Seattle Community Agreements](https://docs.google.com/document/d/1ZMeRmPWmhxynBXZ-aV6R2sQBktjNYRL9Xw9PHpkVpJE/edit?usp=drive_link)
 
@@ -18,8 +18,9 @@ Relevant but not 1-1 walkthrough of how to programmatically find building owners
     - `to_load/`: directory for files that can be loaded directly into the PostgreSQL database
 - `experiments/`: directory for Jupyter notebooks for data exploration and script development
 
-## Data Inputs: 
-[eRealProperty](https://kingcounty.gov/en/dept/kcit/data-information-services/gis-center/property-research): King County assessor data for finding the owner of a given parcel. 
-[Washington State Corporations and Charities Filing Database (CCFS)](https://kingcounty.gov/en/dept/kcit/data-information-services/gis-center/property-research): For looking up a parcel owner name and finding the related business listing and related info. 
- 
-TODO: Find a good source for eviction filing data. Those with access can refer to the [potential data source list](https://docs.google.com/spreadsheets/d/1Ew0UrZvP-S74velkWSKaiSGBYcxIAoRH6IGpEzNWX6s/edit?gid=0#gid=0) to find new data sources. 
\ No newline at end of file
+## Data Inputs:
+[King County Assessor:](https://info.kingcounty.gov/assessor/DataDownload/default.aspx) Download records of all apartment complexes in King County.
+[eRealProperty](https://kingcounty.gov/en/dept/kcit/data-information-services/gis-center/property-research): King County assessor data for finding the owner of a given parcel.
+[Washington State Corporations and Charities Filing Database (CCFS)](https://kingcounty.gov/en/dept/kcit/data-information-services/gis-center/property-research): For looking up a parcel owner name and finding the related business listing and related info.
+
+TODO: Find a good source for eviction filing data. Those with access can refer to the [potential data source list](https://docs.google.com/spreadsheets/d/1Ew0UrZvP-S74velkWSKaiSGBYcxIAoRH6IGpEzNWX6s/edit?gid=0#gid=0) to find new data sources.
diff --git a/experiments/aemp.org b/experiments/aemp.org
index 674a7c8..76b826f 100755
--- a/experiments/aemp.org
+++ b/experiments/aemp.org
@@ -85,3 +85,11 @@ Can then determine which one to use
 ** TODO: Do some data cleaning to have names be the same
 eg. Seattle city of is #1 and #4 most common property owner names, should be standardized
 eg. LLC, LLP, L.L.C. etc. all to one format
+
+* 28 August
+From Dox: 610 Harvard Ave East
+Seattle, WA 981027
+Intense management mispractice, would like to know more about the above address.
+** TODO: At some point, cross-reference with registered rental data
+https://data.seattle.gov/Built-Environment/Rental-Property-Registration/j2xh-c7vt/about_data
+lf
diff --git a/experiments/gre_apartments.ods b/experiments/gre_apartments.ods
index 4f79295e08ee4b4996678d402cc94d3ac74482ae..e3c45168d38818ddc83b53dcb4bec9c758048890 100644
GIT binary patch
delta 7485
zcmaKRWmH_t)@|cWkYK^Bk>DQO1HmC^XxxIkJB<bRMuS6e3+@iVEm&};u?`+21YUB^
zz4tr!{&;)rF=|(>S+i<a{aAC%mDr3h+=hUqs)&S40000103U3UL@YI=-wpI7A_v;P
zhM*@1{C~z0b3jm09ZZaWpnr!z&`bovzrqV3XchPw)DZ#rZ!{YM!QT;{zejlgYE1ah
zfI#4%#i06#%n*g+&)MG`PK-Tg;VGZqB-x9zTSX=<<x2;BiXSmDI{V6|vopS1YRMUS
zyS^iQw?p6U=ROkQ`Z~t!qvM+KemnLkE$NQPT3ff@14-qocKb%hC;4F7hDd9-aaDEz
z(G9XQEGaR|@mZL_C<;M&Jna}ify+3*;5g1kgq0LVvL0kcS=*v;?&wFvW$U*#G*DD5
zEPD#J<SkandM(y<Lx%-)!_C=K(IJP5eV@L886FL)N;vf$Jd7*oW78a`02huMOG;CD
zi<cx8gFHuo5)8>04z1n;PN3qpn%3huhGcV%B6>#yzL0Is&Dl|3j6um?-ao%w9NVeP
zTeuj_%0igf=H(G4ObYAIu3Yz)G?j@|BwkBO*CRgbV6ZQOsa08qp_rN%piiEL;j@*Q
z7m$d=w-Mat?3U@^#p>co;-zG3R}LoP4id;x(#Q!-b{Z2V@3HPp4-(=kj-97Yn^7Lu
z7VLzxqnLt|sMSm}Wu9NRl;sPxveUa7=xYAZe*qz6n|PIBXqstEm@2fDlXp(4=Nna)
z(_7X?$1yqEmd}+rG&=}$Hb{M8Ixtx@q$-)g2z6FqDA`WNjtM;gEbV?`+?SCt-+Wm@
z(Bx(**HhM))Xc7UfHi18U9uq6Irfka$IZ+VW;8D8!;guW2*~I*RAfXUK@%W~m@(CW
zE<n~uMwZ{VIeB{QD_OBiAkre665w|Emcbb_DSoleJFM}0#ivI_Tq5kRy5BN2R+7BD
z(Yx{docHjSO>FHzD7KnjTuv>HIIf9Z+-#DJ9EEVk<T!`A-$vSdr?|5)nAv>tE#xWu
za;rpgWjRJ8rX7Z89&fg0>lQ4i1nS3Ut%RggO(-}ro)#N$<W=%>DQqUDo2S5+h%3UV
zBXB^~<{Z=^Od*2^1TlmW=rJvccuvn-@=zkUh&a>gJH++2JEF3r53954T=VYn%16i{
zul4xPF!Gmy<Fm9%GYfB8Z6Xf1h)x2IA1`|CJu#?<I8Uo`qtuu`$LPks)KyOXaR@PH
zk}0BpxT>xIn=(qK^=@00aV+^_F_xjN^5xV0f@d=a=8UVzH;=N!jp}E@WWJ~zhGEE@
zei99>tNLlz<<%96v6u*5dD$D9oJ_PO6p5FT%M*f<_rb6jMlfti+xMKa*ir%-N{hf=
zXBBMxVzVPHz^KHQXKxz=#x!gd(*$uqm|X@_uxRVVoG~)~J_9Xs+HxHb6ZZ5EL-I>G
zaxG=%ie~m9?q%};r?{RRkXN&NtFZuHbs;RO>zvcZy_4FBl2$N!m?B;sRHZ3gh>RCE
zO0GlSG3uK;wL^P0j4QCCAEv{^k4|tBQOToOPQ-oRT_v-3mK;c|@)AH=2q77lJ+F)f
zY$4$UQiE`iJR|m)U6ymJ1uc+meT@pA`XE##$~Ny@fN5-<7ZyP0zA0I9Gu=p(s*L?M
zo}%46VfvxZQZK@;TdxF&-wm;gI(97BeFx-u_a~i}N=TWi2<D8sqD{~72Pc!Z6KFaZ
zsG!GT<(Ma|flymiSq`a)A%!rcV6G?LD+rd4R}aaMOFVq)fvAFro|O+wL#G`^NXE?Q
zVB-+znnWkROBNL>nZWIXiWWK>^4YBD&8t*mHuN<D&eS1ZiRsrQ0!AE8>v<<-I2W9S
zX>zJGsB%h~YBM&jy$hlyaawm+6V#&Wu_Oc2R3_#ZS7Y1jZ>HI7Az5NAzGu33=bSZ*
z@rR1VEM7MWDc5FFxmT5aoFN82-BwMpd1#S-&4at?C73Xq<1`vn(=5~8W`{h2tE)TX
zgi5vxS*ME{;Ha}8jR*xBOa6TVnLa%!6dM!fDWRHRY{F&b^uQOc)WTVTx>o$Q$YqpU
zu6(~<vP+cy+<4{)l5-@zSCYTXZtJ#IVSaGh&YBgpu6XbCL?8Wix~!8$hgTy$!-b0j
z*nx(H&CtSh*<{nDyC0$@T;Ps;ME|beYo0%bA(Wf^x%q8&7&keFIY(_PaTfV#`~i2E
zx)Hin@zQ#8PS*Q&m;3T9U*feQr5s&8A{R9+7tSI<I^(y25aX|{<nC6%f&TJjnBkhT
zI;@K+k0*l#IB~j4LR0U$9$H(AM?;XVGbopHnO@w@hTW(Q#a+*aRq#F;Bgi+79H_hu
zxpL7Yi95t`H=-_f%<ZZH-gliAa-T+>rw@%1z3;c?6Y6BO*r4I(Mj~fZhDzN_N;vOs
zuz-Jk_<H+t1roc`+@nQc-Qv@x>;1TTLk0MDzzDWiyZ+E;toF04@3WyeE2elKhaoyQ
z;4MioV^X2F5AYFbUT_N<ww390H5H@wV{fRF`(h+Gp>!$~zM4X@1hX#<h}czIZRV%K
z3_GnDvUz`6M&hvQ^HFJ4y}aw`jly-91l9?wa-PEq7!n|pXT!4M$f|ldBQeqKmS=k1
zDyTZ!0g1o8JzwXK6pdP5hl_0Cb+>J1xm+=P;eWrVawEMF0N(=Ywx7-LlNM9^p2q3E
zFuGtQ7cC;YQ#p#)Tu&0{!oQzfU4)-6_><kny_KLQd!GL!uWS0f?m45>J~P<!;q9*i
z^AO3Q6bS$A`lhH9o5kS5W)!F$<HW!fBfQgui>7cfvBKC)$-t|yEpFA_T@@rdG--?i
zdwgg!Ui^A@$(uzj;E2(8N3J~6VuewiAf8*`yhIqYaBL+~s(a?G&54Ql>ZMm{98X)M
zbYTHHeQ1svChAWxJ5tMXMgBLoGYhlpFDaze^N{Fwcro0JPROiVM--|8ZfFFBK+A-{
z;2z+46<OQ$O1|?^$Z6c}3S(f8uMYv%<vO9x<_}JHN0m&+W*pRku8+^?$Q!jjHifF+
zjmo&@B#3?UwT2K@r@6Z7WJ#uOuNu<wlfhA3>Ki>MIt??(j2*u*n@ui$o--_Kp0NA4
z>ke_@0)=lq>0lo-%=g;qP-rd^UhH=w((iQxD>bLY=F`N@Lv6?fLYR)`6o*z$3-h{W
z(hX`H9C7rqKp`C`1%9U!uSRped>=U8$w#|rxNA>4T0#AAmqvy2b+ui)#<25hIkvdS
z3MD_qKVx`5wWvP0skcAI$ZXz(73)`;+X_TP?m6%NDWa`zRh3y4ygV@%eSvh+*|t-~
zqniGt^G>B-uB%U_>k410%r8=7`Ei=y2-We%_m_AON?H98Ox(?Mar&E;x>SSt8_W;a
zTK?XZU6Q^;Z7L(kDhcF!wpalz$dAuLF+5>EbMCMG%|5(e{WVjIYPr~+s?>CB+^D0D
znmOpx1WbJ#H5!*HGc62IEcqGLez@VOcFxyoXduy<+M-u;jMxT0j>oB?I_!9KRQUdg
zrmBbr9KS%CsYM3>tOWspzcu8)WL-TwrYOQ6sRwnyqJ*ru6bcY_ZIq~TY3a2SAtOBI
z3TRMfx{1IylY6t5GCpFUyw9`=Vw8`AwtFY6rDZZ?;2A5Cuf*c>XTE1wH#-!D`Q6{S
z#RZBz9fIgKDS0TTODhg6RS^U(N~xqTtGV4d=2I_b2gN53?FhYEb~GGGw&hAGW0B*K
zy@2uq!Vs>0a#_U4fsv#)6j5G1!nn`4PJuO2X5>}&!c+D_@~aJNJF5f+<{;_SnTB~A
z_a^Qs#!TDq(op~Mp69Ri3Sr|nBkT<b`DGKlROp}9(Y&tvm$ywoec6OQ?Q083>*S*$
zqSfy3#pA>sHm8U=>TSd~l<VNTji%9Fp3&D`Xb?v$>l@OOS*IFUctYtdr7VrCYdlIV
zV>>xhTMG`in~rTH+j5v3>yL3~#daCf#D`YOs`JAi%w%jUF(15=zlvnNlEXb-DziR3
zofsaK@!6ElR+}$0iIH}aZ3?NzmNBT6Fiz#*MK7rN$)9sFLG&?yz87?;3Qb4EKc&Wh
zjtk-N><ap#D1(=9YMpK@=u~}Z>BG)p9Zt2788LB*WGs`Gm}|qIE&hOGMmWqajaNU3
z|9sGUXFD<3@EBRYfsjmW;}xM^;$h_j;qif`3?=>=uAf(YV<WPres5hZ9bd%$n*uSO
zUl;quMh)h*0zLKBT;v)rqnIJXulF_$Y!x77BBrI{8X<DVGCQJQVK<jwZGFWzXxSWt
zz=v8YJ#U*L^drCeBHIsQ*Gibj(l=Qt9$?HRO?dZ8&6qb}o^rBuk|an>(eYq#o{v>=
zX-Rc;_Iwp`PtzeB{qWJHwi@dzW?y56i41N2WJ0(Wm2ap~R>{DHMZ#mMNZTl5OFt_l
zgJv@?f+cWcYH^yAI^JL}m$CR^h)sXFG?1a1-Y|OySl@HZ?DxXy5g8Y)PtPsIcOY20
ze%Zu^@5L*0)cbBfR22wEv%N8iOT7N(*{c)OTgo?9abVTpEmS}5)a5SsspT2<X`IL&
z4SeytFu6xD5~%F82>6S5V%d>e9VRWr++N2{(@e%}TslxNeP)uP|DeJr+%kEqnntQd
z)Z$EQbBq)vr%@M9t#C_pK)!9EIjAgd%S+a{Gn}E7$TMZ!X+kMm{f<JgaDgw5R=dVQ
z2NYUmHeBoPW9-&A7&D`l_->h=j7T7zBXsr5M*~%S+-sy(!3;MvqV$bzZ)yuf#4*U@
z(vUwa0%LrRc%t^BF6J2~TjV!VO%n<0wjW58_*d!YBON0y!V&v6a{jAYK#jpQ*K%ti
z_^O%$XVe@l3+|z}6eiEnj``e<q2m;}KeW=<bYRy&=&qLc5i?QWDEe2D!B(k%z$Xg|
z&pF8I`~7s2-Aj}&+uI`95<0064*4Wmt+)?a&DX0Aev`q&+URZlp`IxuG~_HKG7itf
zV2Z)%QB=EHhJ?uNsuh8^l%`@EC)o`=cbN#YwSq}`U3~0uy59>KJTd^D641Fs`y@AZ
zDAR`Eh^}?+7tb^60JPl3)6L73OD6uGOus0mY9G~eW2!v_l40<7c1m&(!$+FBfpV`H
zXYH+Elo@@5W83d>UD21jq@+fEAHh8`AhE@|Fjsrwfy0m=en#c>U(*W%F;3#OaV=85
zz;8IRcNlK(E@0tjEO&D3<odK*Y|Pncna}j$Q5)3zCZ-Hb7skf+ihfVxypJ&v-99nQ
z*)J*h+>z$iSf+oPBCH@n;HeICMs)_WHgv*xt)osci9G?@T!x*t;>xTOlpBF#TjmOL
zIl9KLj=DpO{RA55N-L&yN^E~EAGbf<|EhF+INv$7MB0gmobG<q0()<?Z7XmdM>#G^
zEQH<3t&YqIMO}OJE!FSxyq;M&oYsnInh9)dGD&Y=Dv}Xw#(L<49DGo_Pr)G7%IsF^
zp9@051iRJ*?kEg4=s7BJEpEah&D)!I&NmxgbK&QwQz<eQoAl_Ck;bYEjm(@%zx3bT
z8Lj}+ent)Q$Efy&PNim+A@i~9oh{Y%&d+%`RE{v%B7U{F<GgKLIZ6<-i4?Qpn3eU=
zVxvzOTOTe{0ky3|D#v7{mB9)$WVgOEz&%hb6FPEv(_^_LV@0*D=oJ0g@X;}{c2Ak;
zgQ;>>{!c1SL|Ui!Z30|D8Wt5k1z+yNHOwlopif78bLssXk)&)8`rIi5(x5Tpm*t)1
zq*$HVMiIOAay73ycw6AgsMy3Sf{}^B0RGNMTRnW3jh6<5iQ;Ayaf*rlre^DUy3w%r
zB*BR+(uvGbt)(V=hQ%#DC@*j;Vf0DQ_-rxvNqK>o3)kcek>tbg4ib-yR3M2A@+YxR
z^R(Cz&x!XHX#{{Y)VZ9UAD<>Qif0@ofXaodAZX#n7>Sfzk7O=;M(Gc)vSXZ8-8ft2
zlQc@MjbD&^t5_K+w5y@nNDLqK&Bau>P|3u^g!J}AygSrxP8pc*kjesey%L@opDvu&
z9>TsYT6{W*fxG+bS9FL}nMdBDJQFZ*%W%y;pX(H=!qkhz)N|yT?UIRbFI=#bT(e8j
z*hYO+`6Q6fqC?pp9e6k2#W}59isZ`ewZnngvj_3AdG7Pw$u6Ygd`|EjBgBIy^T*Jr
z;Sw1Dycq9o0yqvIOKYYjWk{EmV<kc5*9=h8_1>}!3^RFNr~lPKa<()%nCIdB&FbL*
zBL7Z4+=72>(TK`$Xv=q<);;||Gn|U8n`B83*r}ybF8Giw*XT(_Em`pkEEUfC4KWVo
z$^!Ce^L$~$)WNm5$*gsAY}(nkWWu&B=b6ltQ^rK-b(6!ws6`Xzqss0!J7)_>Zum{r
zk1z`fCycCP6Pk2>#+0d4IHISYZAh1S`@-&ki@Z4bjQF=ST6n6O<7+Vg=vofnnI}!!
zM(}Wn-McdNk3Lwh_-kIo`Nyb5f&<8wL(U<)7Xzg7-6X$f$<kK&kS$PG@{Wp9ZS=&?
z$<>-GYuog51ne8$0-i=nu64yqE_Qxi*T5k>kaPY&8b|fe?`8RKXUt9fe>9FB;2+O#
z0Kned&4k^{-Y!-{IevwjpyNgx>{i1U#w$}BYgaQTHQ)cPYOZolPkOUTF&fn8mHqg0
z)M!aLQ_8W!B+UCV@Mvo_;{e64Cc6q(UY#H)Ma6n$;lTQdi+x(^vf5>83ffCZ0J+EQ
zi!>97Gvi)om~2ijJ`{DR#giQ1ApLS<rKPk@inZ>-;Irn+B6Q%Lu$tZDK=_a{25U;y
z9f_W|KC|Fc#|sIPpEFCs>2RKsSutuBqBYmq_Vz3kg0T;jMbn=W()KI*n@r6=$$%~S
z!@-M=s5$u-nctHe%k~Gph5(nBAmB&<mNHZHEKTwIw+fRklXanW9ZJ#MQ!IW`YD^<F
zMN2sZS-6u{>ieP7NpO!B>sHY$j1&#_X1up|mHWpzex|!TEL~OXPn^{sE}CCW^Y0m*
zyfAEG1E0|dY=|5chI6=jDCxY$Pn~X;wf}9|_|^)a)E!{$I9+^a7!D*+);<-(TrN$o
z=Br^~$4=8;M#FWZYKXBAi6{&LxAlgR;x+{0#Vmd9(3kYgs^k-R(^7UePrs!{Rd9#&
z=Po_!3F-<E000%F|G7&j12Gju$8}o(Ct$-Uaoe3G1T=)HnZzX7;)1gs9m>Dt5JRYQ
zp}@WQaL-0IYyni^MlJCudCAdRcjaEO*6jAfA9p&01Sl^c%%Rm~RfJ^BegnLmQcPVE
zdw6#ra6Uvr6(5P=9^bA8P@+&!jp8(-WJMf(zqV(m^l+(82hKK=f1eH60=ze9b*Dts
z<EZG)GI$%wCQF$7`Ow?CKoiW72D`jVyPF|me{d;!SCKN?d|Z}w35!JkkyP*IqBR>g
zHU@(&9nQ*<=)t+o?XG`*u!>V8vK6dshB!AZk6mnl)NttP1i@Gg1Cxqbm-jI}4>~qQ
z_QwJGX87{t&*T7n-e8Dsx27`?M&GA-_*$5y8}*K9lZ?2+G$DLI18P4l`|;h+HXc8s
z2=S%S`;q1vs{KdKo-L3GC2P+L9m*w~ipc;;De{&Uv%dpo@5GcsJ<ef3cdvTKq=G+<
zC8m6$>J>jjl}3yY(7Fpm<f8?(5<AHgY@L{7ey+F^+l?QN_l3+!>@ssB(SDLXOuzfg
z^b?)vZpo0R!?HrB|44_qllFT+=MZUQZ_0KrWsqzZngzLqUmb;ny;7m_T=Gm*sy6Dk
z@PVccY4Hhk2c_E%+Tr3ACxMbcox|6WKo?RpR|fq+%f8oZ6B02@xf_A;YXZ>=#2OjU
zZ+PFR$4i`~h#Me+8m{ccC<!($UwX9k!FZTqQWUGo!pmqGIa4ixVE~agW+>Zy1yR%Q
zZIP|^VLl<7sl914^vCX`sA5OYW&w$~L8*zqGeetx0>dOhaTtq05jJ54sM|$d@DYyh
zagB~1q<M#mn6#P?z<N6zBXO1tX@#IAZ{D;r<B`6x>4UuCy0vEyjxhC(9(?7BAsp3B
zJe8ZrC+JU~UNoxxHoklwIxAXgfk4$1Z4RAfG#1MBMDV4K0woDa;R~bW-Rv5CN|G=c
z2<k6;Ma?4ue@)l8%zn=BN}dk=QjgX7(VJ%W=RlHxtqX-BO2$GHQV_k<+iHcbKE?xT
z!C3C&Oa;jG&fxCKnO=03!B*_>`Ebmz(vKVDuI#&x^7>F&44T;VR{UNP@6m;?4}`TY
zi&(5p9vJWBr6=tlCW|1E`abgyOJb72_g=n^f;i||;Ga`hcQ{3UVmP5?EZ<*oxdsuc
z-hTkavY~I|vFHQ*K6&A5@E3s1g^nN2CeR7LX;DKyiU3iD2N>X;tX3%4UUGQjnA!=d
z^fK=xxD%4(e@L3BiRAOwdVby@ctB;jK<{faD?9q`%(3LZ!AFFBE@C{z-0Vwqi^?P3
zSL2&l!AuS|d`m%p#iPnbYZ*_05xi6ZZk1;1?(RYmGm(!Ojh!}K&ph_LzB--Z`26nv
zg-r$o@RJjV6BX3cPJZ>{8^ZUIv3ct(uHJ&YzCBW8#m)z6)9R?NP6ojIw`F%?ENSc=
zo9!Swn-7%loW?q1dIFO1KbpNoJ8_nYmYbZN!;+>#)1mA_!bEm{{*K3!hNlM<(X<(a
zz`QK7xU}QZ)MtiXGKAIlIYN;LWA0adrS}{<<b?RSqko0(3n8VF>i9<}Bz3=0d71O%
zrUV<e?ajpojM65aHFSEeRdyI?BYoP?D2v4Qu@ZM@E0WLWVE(F{B9^kcZ>Yuj6!q1G
zkjE;f4HxBd;-a;!7dRT#!)%{hG=N;aisZ`DAzb}NUXQLjd(w2<y>gk+O0n6-K)Bxn
zB2xbKz`67ZE_%hw(Azq<&z~k|C<^k9?(5DvGTlcNa&H%+N*A?{)QFa)Xpai_7vh|0
zoOfvWUeOquYgSYAwnOE3`KV!O_<J}GxWr;;0nboPv8E*(<-^$7WLb`CXhM+guOMrF
zQjhk9I{23=hqqsfwQLV>HWJnwVJbsn5Weun!{OZ$3wHwjf-jxCMV=mGe3T8)Fl!y8
zx#eP7yhC^ei|@TiP4x1wYKQDqy{pFu&)I|DFMUgprLjsuk4ZX#Dh0#YkBg5}6t?Q@
z(bL&aH|TwqSv3S+uQYej-)xmwaA|m%d>RSG-hsEOO|oc>C9{k$C;|KY-5u+s9az|W
zQ6}81=9yRyj___aV?$4>LQdDP_jpz!_Ox$M$8`9aIUW)IB$75R+e!*l007GL??m#i
zN?H9T`MAIz<D<St%k^&ugkJlv+86Xpn+)oqO#q$J!J+*Z`oAOu`9I_Y01ySeM*v|V
zAQAxnQ<#Hd>yRN10H7>7jMTqZ`2S3P06@3E%zybWzgI=zgMI`v{MGxzV21!DL!kU0
z>fbQLPY6&f@XP<;mJt%G{uieA8-|CN=tN8O_s;%h_vrn{2!fvJu>8e9sY`|+02R_D
z1OM6TKSdPAfBmNy764%8=-_7V;Pyv5sVe?A@jo;6AC2{Yj~=v2mq_#v`9CiM^Zx+9
zzj%lN09SK2H){t=*MGAAyR3<zM}`<h2|Xnzg$n3l{&lFi6j9||za;`IG63-6zsPj7
Sp<P-;P+vV9)CusP$o~U_w#zyI

delta 7092
zcma)BbyQW|w&xs>k}iQmNvMQ00@B^xskDG}ACS$VJ3JtVLkdcFHwco_B`Mt{(j`y*
z?!E8(?jLWwHO3xm@44olYwk72p6j>fueTADPzS<QS3*T2K|#SpK}my<CE#kH{_5bg
z;Aa^B7L7iHV}eQkh$}!mzx#w=A|&XKD8cL@+!93eXA}#>|JNwrzaAeZ)RX;@m~miW
zVq*TD0B#Is0s2RmQ$BlIU^JyVmN7nk`}*2NLMd?oPX?ROVR)^K!+ZN(v5U2<`y+Yt
z*7J)4S_KXN%bilXZ-g!|iiL|&oh{$+M{HfZix+KvHxz8QdUK1-mIk53)v5-rPY9C4
zdwm0gz4QoJrkLcw8dx7QR@=9c60k002RNced8$}?0*ENR=C&`HiheA~w${C<WKbHk
z<o?_wolGx!^Ha|`EE^PYV7XN9mXT{2E`--96OX~1NI=E)T*QsY%hv!$QMzc7WkkCN
zS!r&$^Q4{?Q(R8^qw_rda($xP0{^^94^MYg!&qb;vypfM?U5}-bBJr%9rU{UDE0Nw
zq_x}P2+%`bG{Fq&T+Vi*Mhw^3xbZDufw|F%bQ9HAsJB@}RP01t*a*vrR5)9x)vB{~
zBR0PCZflaDN9LI+aN8<?6*N^$tKMO#@@gT`!s*uQF|&*ES!|fX4vYzpns@4=sbASa
zyFTMDB!yeZ1WHGh81e9~<{Ei;ny_r-Ru}IJ0<3I8NP}l>pQlE3jJJb@0(0$OlfKHe
zM&I1gY({LTNNKgoZ$@#9<+5?C2dDFc<d{|~6}!1I(<3cJ$FTZne#|J04wOU}whyW@
z?`^Ti2fKxkI%&NtzaKf5t!5Frk$9rMN06B=^u(;JgC{!ti+@s^gc3Pw2ns^+vq6KS
zJdnCwY-z#Oa#rWKK0uyC7lZ|7Rt%qR<@MC=%(yI`aJ=F74y%t1KL=?p`>r5_roiy)
zm^&eT?xQ@sOp241sfkG1wQ%}acnpWCemI$2gjMC@g<NmNLH^6xYA<E(l7Z{(!@KXI
zH@tR6$u6-j=aShgF2%kvvGE;LuEL!i0P=FQ7^nM(W6vOZzYjbchbDYMw;`pZqVb+Y
zAm$OZFP7kdAlrG?TCudTIb(5Run-+1t3+f-uy)Ch<w@LJk2Mm-^Y!X<hRI|uDY6UV
z*$WMQMG;xLL3~a*2(5-ld^u{;jsNQ1k57lH_p{pL@ut)U^GO6`bGixAAIV);170LW
zV}$<DuX@FtiCImfD_i+x(-^q<`M&df@9A&%)7b-H149aRL#(QW#&g3Z(bUH^@o5Vz
zzL=|!yP1`%OWfu&k(&Av9%XqMAHuRq#_>x*7k#iU(&4$4_?udKb9^mNFxwZjw@a#|
zDkM)Clrgr?RT{ap99lUCB|d3Z14l{-eyB&Gu?|;9kY#pop{`3L!Yt&&ycEB$PgaER
z7IH$@$q%MeU3Bv@vOSi;yXh!oO(22TlTRzwHge+m`ypN^kyV)JwPie}w$-Paq<kKs
zwMq9lYBca;WgWU2*HrpG{ND3{vF=1@!OHbq<6*Z1txMWq0NW6@{A0o*U~csY6`DrJ
zOo&kzu0<`ls<ucqm(%_2>sH(M#W0^Q@8DEYrgCL16rG6|$6Q@VT@PQIr-FydOoBVA
zU0et1F`82BB6x~?qkY9VWw@zgV1rNfYeJ7k5eU6gty4=-^{L9s_Y#kBLm@9X1GS>`
z@2%B^qdq(}y~G%rv5kAl0{F*tzQ_U5ICm60mkHBbj#`hxzA$9N()1Mz!{wW<<*Qrb
z|5C|@^;8B#-76>r6Y01#Z8Y5`C}Gl7>Yid!o<yafGbq3{E-vMA(C}1NJgqm6msfuz
zCqIwS7^i&Ib`+=o(!}4w{&Rq~eN6d@+_bOoZMT<ms#_nsHRb_LGXU`y9)O%!{LJ0L
z`6gm{cr!hcCtSoeV4f^UI2-hkTs(W^kkjW(3sxXHzaAb$vZ)^#Lu-2v)lHO~5i)my
zm3G4#Sjh1>cr(Z9>BQ@X(KlIqw!>U4_uj&Al)@x(Zs#0RE@lrX2M|67(i9wY1z~Dr
zm!8oda%AkS?TBhJ-vK+huSW)pW6it|3Y}%G%eGdJ_1Fxh%Fc!?C)q_j`?|6jztdC;
zG;d`OG!<UjrNq5k2!BTSDqg?t$TdI4CYU<|&*~;Ulsf}^ovxyVB8|E~Zp#t-V=K|f
zK-HU9VuD{h_fHHuy*L{N8=&kn-+Gxm`aY#;$hXI$^d-5GoW26y2AIe|aKkivybI5g
zmM5cjHK9<RZ4sl;o%0oBAr0<U=H>g_6fT9#XU%c5LPKf~N{6n3m>5qV=CmQA$BIfT
zsg57+v`Wv<bG&&$h||d_t@Dj<6_z{uV6DRz|9KGG*?cx0Hv<*5N|n@=grpC_BCGyw
zNW}^LT*+K=yUtsHydK(U=<`z;hZbdei_XVF>olZeN&UK@qufx04M(hl!xW1JMU5hm
zQDZ?^4DAkfCAbU^T~2jA8BXK*xqfX5xwMq%JsFN(olha2*`&_*m!HU~cXART$6eHm
zoBTX8&S~s@Qk_!&LA&#NRN~G(7^R@WthkA1`AxJqpJy$wsGmg!hZ*{9i+{Y@ToE5U
z{wYE<9(UfVus1q!V7rM`cX4`eGffU~y%P64*v_DOUM4~%0eK+0A=^o|wzQy;x57#j
zlhfl@^6mL;LV*zx$H~fwQLwe}!i7`L$bGEvi6v1yF=nNK)3W&-@J!K5o+GirqQs}@
z#CoGEzCm69l?Q^)D2g@Y=9#P4AT<?UcUO3%sak1h470~k$PUzXb5bIPN<=7!$v8Ob
zIPi_qgJC`A(4}lv%m?>*<yJlPoFh8AsABj{K3Uc%<_6iJWuD(#eIlt9czxv}E0*v|
zlh-1+oLKSG78O4~q49w`>)HKg5&br;k8?HtNs2HH0K}vkz>wC08rgFYpqaP?CQ>It
z&{Z5+wD}0}s=WEWkN}0;Bk&%^tF-zyR~dc-%KbTLd%aKA0h^yxf(7Q3Dhj>h$~B#N
zRYu&8;l)91+EtVq(!$10H_YYnwRc~_3pP*VU@TYm?hIXkysjU4%*W=Tw`0OH_vl`W
z@j$N`a09&S+<@<F?d1BaFHHB&onu#PU(ma1qHa#8-|7<DIWK`X^7S}GoZ8|7uVgJe
zTN{HkoNI!?k#><gE&M|H*2qR;r?XC!HHzapj?;52#p|+Z2Fv=oGSe0)g*(Q+{0x@l
z<7<|LeFh~bjQzl{27w;C4t<I%j`iBxEMZ;%(H4_t8&i={<5;NcZ>?`Hm{)=eoe@kf
zKR+|NPkb5ad&!+C&_8L9>^}?39l=4HTTH!EU{iok`u&vR@O&Cm@^B%WnpN{jdfC;k
z*&ym{)ATvZBj@0?EK~bmTLb~S1l8pnC(cPGXSME=?tNMj8)sOpS^X5ik`oqL<*+E^
zrBT5yD4`|Zrdw}Pf&^Rd?do}RMr(ii?nO~|hpMiGf$4Uf4Ir^lQ0@z&p!@|8|ApVG
zWN<`4z%L+Ygz~p`|JS{RlcT$pqdU8|!|Mg3FRrW4$QFk6vJiy@Dfd+3C!!+P6X{1C
zK3erNY3?g)8niPqR!}nP(^<|m`SlJAuPbRTDOIx-FdGe4q=&GAhg!~;T%0-N&o0&Q
zL7;O)mng39TPQkHfK)QnzaH39;MvqXkQXz#Sxp<-Y>jH7@z4X|HSCSWuf_+31q8eB
zn+tS5loNKTG&f7mo{npJi0wST?wBkqFyPQM?9iw%U%e1xk$(PxX;VW_V`}$w#!xKh
z^?Ly=nUg*$F=r+@4#Z}H$tzq*6N4u-EAp-9M^|C<D`tH`>12gw<ch$Uk+=mAXRmX4
z3Vq$AL`64)ZxJi`z^S;|$%u4G&`P2iqm0?s3-9bz`PXA9aZA5ADbp;A<V(V`Mk?mU
zW&(P59edc*IrMOj^8pvR23fz9n+BTA3wK|gn0F4{n!<{<gsS8p!u>oc6|a1GDhm^6
z%;|><_QvQW=i3<OB)$R|Ii)GXT&(Yq7t3F7A!;XN_XkI=mg2SZF_j}CeB&dc!i@wL
z#dAm*uzKA{mCMP!i}P*l9&(t4(f&~ROx6YB%qtpEs!1K9^$+JfmU%!QVQ?N1_M^G$
zCPCYD7tOeK10uS}yJ3{D-F!v1TWDiQNW4Jk=N(6(2xcjJIjIkTcf+E%HR85SJ=_*p
zfBmAgOEcQ!u}@a5lEf_EV}CE@`>0LDHIe$ozn3d#lxDl<JgVQ`w{4o?saJ&lx{hkQ
zT5Y&~M87YZLl+*WoKDQW+Sf{3_b1*C`t(W7Sk+@6a27RF5XcQP@|<%W6_#)%1$q0z
zLOgcZtJsq6<8IRfWw>KFM65EMX`#j$3=h80cq;h0WDv0;C9aIVZz)*Z;Z<2i2E=se
zgO6_M!_Y;2_7v`<31fiyj*l@!-YbSn>-X@uju$dW%5DYx1H=6e*nP)D%>A<s);am2
zT#;y~K`%s6ean;Qo|R)a^dMH3c=tjtXhL4W5HDcM=ziS5yZ%;>;n~s1aTJ%uBx13y
zQ2C#t6mX?Udi`#(gn~WIN^D8yIyU}(!W=^D`5rFB_yKk2R*_%WX5w-NqvR$H&&BMy
zbPRa-IdvyK@lU4bVfD5iE?UFS_jOkVT%jgwCp^=yw|f%oC*)M^$!A^Gkf%>sgUj7;
z1Z&msqf6@m1Xm;bxMGv^D$#Wm(*ua9L)20jMVUF<c=$Dl-gO39MOr$PnzPV++>O#j
z#Um(rpDm9}N?4AyqYhn%QXN?;A{L7Y9k{UkCf%?fc3&`y=0G91@8&>ok~iJ+@Cg%n
z$Pc%ncV_Z)oXb|p%dq|6^YoX9_~yrw%JU!Lv_`AIFhSpG%{<3+Qta%eoZXa*TuYCN
zbu788Y4c~qi?lrAC;dN26@KJXS+H+B&76xFZlHy7OL!qZi`d4>M}Nd&sKw}vB9&0?
z#$RFKzz#IYuTc|zz{T-xFTI-QG8HslF%y`Dge1#7JsTx%QVtNxQrPKzrtmzuFYz$V
z@?)hMurG8m(H1n^?e+a~|B@v2Ht}cd=Cogdn(S90s>owc)@P4CJOKy|%u>W;>mPTe
zT`o(4FZsOW7Q1|0abws;Res?^luTBG&W1|rl@p2UjK>oO_R4TG=^fggm(I3rbUVlE
z9C*umL`25s&N1~(#UAu*{Vpx6{y~VSh@=m_KoBhH>Cx@o#03xiw=1q<52W_AF5ifI
zsrX$K78@}+wT^wOWmE2?yeDHfcPF7Nm9L0`?`i2RFCVGBD=NEOGP8!gb>NZrcOvA<
z@<?P>hV<q_#-%;5bhCS21v~4BYs<u~5&PK!x7RfzedE)W^zuif(^qlVPIJZ~cWat1
z04lV+XYfPt{i|K~Ed}KJnwOm;yl?oMCi0hRVQmjuI2GHv(?U;ab{cbw+R-Ur3+T0b
z2Af9@L9_<=E`00^hxRSGg-g^=q@P^%5Ji^SmYa!}=qNnCca`3286Zs6$Y+oCjIP}A
zlLv4$<IbJb?pZ=5no3rWG&qY9ytP!70A|@}(AzwXsK?|OM&(wmu6j!glh9meXy;Cu
zrFG_1j>7YCSrsf%0S?OdF5(P|ez7|xZIlF&g+-%mu2{mASI<9@I$OuIBD8$hZZM)h
zkyE9xt12%g&J?EPS6nVnOmn^b@uCVzdH+I4BWJs$>sU}+%6WU(r$tYv<RfKQ2p~Tn
zIswhp#&NnHe~<e}SHgS`=aUMsqrWlHO1Uth*weuM$xX5E*4|yjbwH0(h;d$*etyb`
z<iOtM1be}vo%y9e*2>*G@|)950L~{H9fs&q61&<RzJm$ofZF?liYCI!p`S&MP&$ty
zzms1)B#1+p4g#CZ<J^V>k|sr-KrhMGxobDUO3zDiz38vL&Ad98jw*9pc~eSph(rH3
zygjQ4xajbi3DBE|-|fd;_r^CiJn4_ldKwjKk<WwGCySE$el7MZ>$S0~A`8gP7_DFJ
zV_j2Pemf2|hylGn3~WmAhS>Onu`jn<scKP&YIetvk#cp}sF{|1wUGKGAT$|V4{IZ4
zQr$Mx?im`hF>N`#uT5Pjo{6`3aSe7BOfl$e<w5E%*>P8?-L$sLTUz)PuO+ph1gM-M
z&hM4`)I)uQvMX+7!Y?AtoTa)jKGuAFL;1>fic^V67GuY-ghV#8xRoarxn#{<nYL$o
zdG)1JL+N{T=XtYCA!kAXfGuVS9k28vu**Yg{tlAe-8Z>38h<C?#{fE{9Q8GE{{BiA
z&Fj{#3F~3DIrsMDg#8n>)G2K~(&(#YqEx)p&_e#gN3}RVRo7P7xauE<N6D0&`{znH
z-{0HSphxgONgB>wN3n6Whrw(GH%>mgDu_We#HPI%WaaE@AHD|cCfU^>7eWbNocy;m
zo|Wk^JJf(?Q<JfKL_g=tu(ib$FIz%=%ws;hh|Z_ogD0qt2jF5tO6=5n_`fsoX*%|8
zV>@`S$IUBcE{a{2n=)@``$04BW`907egOP|KgTwP;l;lyVy<HU1%LiRJ#a@dGQeq`
zj->fqPoGy0qP$llQOj^TqE)E7BfI0!)e$8XX%^zre0!eM_KbEx%Bk5b)aN*0=U_f%
z>z-d(dWi;-fCQGLYCAWzWp>BMK5BAY;2IZa2-Z`@qn;-4B9L}Jh~D?vP2%Q*VlRK@
zqUqtHa3bC`QpRfNda6qwej(F&0Z^1gDj4T{GCGhh^L3QL5W#=jL_9qh*yegUKc&g)
zQ}G=4+kmX1IVWcXH0=@dw|bkL{X_fokI`JhhmP8t^C1Er6>AITmfi&%dkt5Mn7ex$
z1iM#*?K~5&$I~mHO{VEX2_%Fdtb|$bgEzRCEszIV-N%8O29n2t32#YdLV&U-EKHYj
z%uH$Tgcq5thPY+1-|BhiJT1*~AE}L*T-vNc(q1RN`BIr#)FXC%X!RqGcZ9zQe~;9(
z{;B?VdV$6H-JGyPH&^B3;72}^P6svALt!u056Zt}^GA2~HQB_2HWTOlD5^EMOdL$r
zjH%e2s`2T!L|;)1dK1W@R=5iV;qobtf)zQfJF#m64+IPxI3?-}zBjRUd~KCFe()Q_
z+E*SjWq?pnlu`dHig0mqT3|tKRe%6!YXh%SUw!IfC*3bhT;HN*)6uGLdgBJSHDC75
zt{AVr{+gLqjj7o2j#qY;RLz0vnt%1kPsrv4S;BNGDsyb*ZZ#fqh(;d&4SP_j8-K(6
z-Jaj}txISX#d+3hPO>!JX)Cy-0cYYHJ&~q|a3T+ldwQ@XM#@Sd17LtwnA3vSP51T7
zb2S?;Zu84tN$1NE56O~M)trRfZ3(D%012;QUS3|z-aeFtUR0HLTF_=@&9lm9qBoP)
z#zt+9yLQ&9dwS=Nc$7-8rG{#1?l8UN?Css28*W0v;5WNzg5Brg(<yH9v>!J}N_<;4
zGFU}PM;^Z<j!r*SY5}6(?>ZNxn;sV*5v{4Tf8`wajK5oM#f!?qPaPar6Y6{c!taor
ziW387k=M83UDw7}<tPB0<U?0jVhO0gwgI;@9`6>HJ?RlUYM#x>`_o6NxLfF%#Lq~-
z^T=j$xJaQa%u0eVu$Vs(vXlz;15v?mH23K;8SeCQ(9c8$H-V+@z+Eml=ykuS^q6Xy
z39jFu^Pp~3D0OndNo@ToG>&ga&@M(JrhSnxr;nRmu##H*Fh%~E29K=KU?KNQ5c!-x
z>Fo#ldWiYe<0F+iE*X;c598yB;xeo8rr~Hm(a5em@py?5rc&+ARum-^#D(+~RMYF+
z1kQs@#@Q3hV?fY7P1Hmp;TZS4_9Ql%pgLPQk9Z1nBCH{-X(=EV=1BCufzxP}PBCtm
zH1J-KT8`4pE*sA4UF<3p4I<bNd36<XJ^339#lhfRLToN8ZOR`;JtaB#ndL=loM_zZ
z5JJTKr@&AM_acmQ^~L020M1YTiD&PxBk&UTmR``yL;x>$LPw>x$vjEgsE3`@F`k%W
zKGM59>oaS5W6YK8n~NmFJBus;MKZp%P%aC&6!#3b9CGf+B^wFbc{ch;z5dIqNXZ6u
z$_&qkZw>ZI@g>cO6xf5x#suzI61F!#E2pI|A((MLPSv8qsJ!!anKJpXE!3a99lOe%
z{ML%_qizMtduK?$spN`gy6N=$t1NX5=;@?WwYDwwn<#QHCf>+3ywj}<Pihji(Vl*)
z&qPRCca)%XN7issc&Q~U0sGe4l8n#zIl89p<M7u9;Bi8ig1e6G00sQukTH(ft{<u(
z6V5@l_Q#YM%H9z#8z&6(!f1n|5qU6l@-DRgix0EFm%y@7lAG2Q<wlw#eF#HUJdUO1
zfG8T<9x;^E7vp-cy)0$pkgnJ$rJ%H?MPE4<2wUSb2SsCAi=+MQdZlmi#VMa%_bZ2E
zZ?9(V6@UD|05nlv?QsfFE4gLz_U2qMquA!+tFh>Om*}{2kblHuv0}`a0b0~!4Q$Aa
zK3Nd}YpuxtP%$-4Y4O=SOR1yxUixd#ZTqzvf(&K>!RMQAM_nQB#1tzXmVmCe16FQ`
zU<KZ!Xro>c+?zN45&o<g;3V9I%bIJq!_k)NY44huyQ&5FkVX8_C*tsx#+~HK>nmX+
z8cV4n!_l#Ikt=nY_ogjZGy8^x)$xwa7p}Ps8fPjRhrsVVCGi0!4xpi+Br*OwPyLH}
zsY<395co}?R9Wk}f&P#x^ZI`n7`VC-1pd^31TGFHgvT49|C_9VLk%II1h@u>_!nn`
z!wdqGp!|p1`NxT!{BNgs6qLR{ZyXu^#q#{NarZ}p1pWVQ@Wa&&AOG=j#{dF$0Kv&Y
zbh!U;;J0(&JW#?C4Sc}>`#-9SpoCmHmVapeAFfh>6P&3Y{H6V`nuO7Rsul1&BUakq
z#fj?kS7YeEQh(5|)Q|IzPkjhKGJ^ck`THjSyMXb(%?!?JOsVtR&fm9?UwGheO_mG=
z#lgzm?03CGUFo0x{5_!moShPmG=|Xqw%j?nN}D2pg7W+NXE4U8;FHFW{w&W=V+c4~
P2M#fLj83Nad-DGQzGng0

diff --git a/processors/corp_owners.py b/processors/corp_owners.py
index 07e300c..00513c6 100644
--- a/processors/corp_owners.py
+++ b/processors/corp_owners.py
@@ -1,5 +1,10 @@
 """
-    Utility functions for extracting owners. 
+    Utility functions for 
+    1. LookupCompaniesHelper: looking up a parcel owner in the WA Corporations and Charities Database, 
+    and extracting the best search result. 
+    2. GroupCompaniesHelper (WIP): given a company's stated governors and addresses, 
+    groups together addresses that likely share the same landlord. 
+    
 """
 
 import pandas as pd
@@ -11,9 +16,6 @@ import re
 # import geopandas as gp
 import urllib.parse
 
-# Utils for finding principals
-
-
 search_for_business_url = 'https://ccfs-api.prod.sos.wa.gov/api/BusinessSearch/GetBusinessSearchList'
 # search_for_business_url = 'https://cfda.sos.wa.gov/api/BusinessSearch/GetBusinessSearchList'
 principal_url = 'https://ccfs-api.prod.sos.wa.gov/api/BusinessSearch/GetAdvanceBusinessSearchList'
@@ -40,11 +42,30 @@ def get_business_search_payload(business_name, page_count, page_num):
     }
 
 def get_business_details(business_id):
-    """ Get business details from the Corporation and charities filing database. """
+    """ Get business details from the Corporation and charities filing database. 
+    """
     url = f"https://ccfs-api.prod.sos.wa.gov/api/BusinessSearch/BusinessInformation?businessID={business_id}"
     # url = 'https://cfda.sos.wa.gov/#/BusinessSearch/BusinessInformation?businessID={business_id}'.format(business_id=business_id)
-    r = requests.get(url)
-    return json.loads(r.text)
+    if(os.path.exists(f"../data/inputs/principals_json/{business_id}.json")):
+        # print("found json")
+        with open(f"../data/inputs/principals_json/{business_id}.json", 'r') as f: 
+            return json.load(f)
+    else: 
+        r = requests.get(url)
+        # Try to read the response text
+        try: 
+            r_json = json.loads(r.text)
+        except: 
+            r_json = {}
+    
+        try:
+            # TODO: Will this write an empty string if no actual request result? 
+            with open(f"../data/inputs/principals_json/{business_id}.json", 'r') as f: 
+                str_json = json.dumps(r_json)
+                f.write(str_json)
+        except:
+            pass
+        return r_json
 
 
 class LookupCompaniesHelper:
@@ -53,26 +74,67 @@ class LookupCompaniesHelper:
 
     def _get_empty_df(self):
         return pd.DataFrame([], columns = ['SearchTerm', 'BusinessName', 'UBINumber', 'BusinessId', 
-                                           'Address', 'Status', 'address_match', 'ubi_match', 'id_match'])
+                                           'Address', 'Status', 'address_match'])
     
-    def _get_business_search_results(self, business_name, page_num):
-        r = requests.post(search_for_business_url, get_business_search_payload(business_name, 100, page_num))
-        try:
+    def _get_business_search_results(self, business_name_orig, page_num):
+        business_name = business_name_orig.strip()
+        no_result = True
+        result = {}
+        while no_result and len(business_name) > 0:
+            print(f"searching with name {business_name}")
+            r = requests.post(search_for_business_url, get_business_search_payload(business_name, 100, page_num))
+            # TODO: add back the try-catch, but with better recovery this time
+            # Seems like it's more of a network issue than didn't find anything
+            if r.status_code == 429: 
+                # TODO: Raise an error instead
+                print("This IP address has likely been blocked by CCFS, try using a vpn")
             result = json.loads(r.text)
-        #return json.loads(r.text)
-        except:
-            result = {}
+            if len(result) > 0:
+                no_result = False
+            else:
+                # Strip off the last word from the search term and try again next iteration
+                try: 
+                    # Get the index of the last space in the name
+                    last_space = business_name[::-1].index(" ") 
+                    business_name = business_name[: -1 - last_space].strip()
+                except ValueError: 
+                    # TODO: In this case, try with the LastBuyer in stead of ListedOwner? Upstream
+                    print(f"Found no business with name {business_name_orig}\n")
+                    business_name = ""
+                    
+                
         return result
 
     def _extract_search_results(self, search_term, search_req_response):
-        res_list = [[search_term, res['BusinessName'], res['UBINumber'], res['BusinessID'],
-                    res['PrincipalOffice']['PrincipalStreetAddress']['FullAddress'], res["BusinessStatus"]] 
-                    for res in search_req_response]
-        res_df = pd.DataFrame(res_list, columns=['SearchTerm', 'BusinessName', 'UBINumber', 'BusinessId', 'Address', "Status"])
-        # Basically keep a list of exact matches, and build a list of potential matches that we give to human verifiers
+        # TODO: If no results, return a row with the search term and nans for everything else
+        res_list = []
+        for res in search_req_response:
+            # build up the known responses
+            # get more business data from that id
+            business_info = get_business_details(res["BusinessID"])
+            res_list += [[search_term.strip(), 
+                          res.get('BusinessName').strip(), 
+                          res.get('UBINumber'), 
+                          res.get('BusinessID'),
+                          res.get('PrincipalOffice')['PrincipalStreetAddress']['FullAddress'], 
+                          res.get("BusinessStatus"), 
+                          business_info.get("BINAICSCodeDesc", "NOT_FOUND")]]
+        # return an empty row if no search results
+        if len(search_req_response) == 0: 
+            res_list += [[search_term, "NOT_FOUND", "NOT_FOUND", "NOT_FOUND", "NOT_FOUND", "NOT_FOUND", "NOT_FOUND"]]
+        
+        res_df = pd.DataFrame(res_list, columns=['SearchTerm', 'BusinessName', 'UBINumber', 'BusinessId', 'Address', "Status", "BusinessNature"])
+        
+        # Clean some of the results a bit more:
+        # Keep only active companies and searches that yielded no results
+        res_df = res_df[(res_df["Status"]=="Active") | (res_df["Status"]=="NOT_FOUND")]
+        # TODO: Maybe keep only real estate / property investments? 
+
+        # Keep a list of exact matches, or later build a list of potential matches that we give to human verifiers
         exact_match = res_df.index[res_df['BusinessName'] == search_term].tolist()
         if exact_match:
             res_df = pd.concat([res_df.iloc[[exact_match[0]],:], res_df.drop(exact_match[0], axis=0)], axis=0)
+            
         return res_df
 
     def _determine_search_matches(self, search_results_df):
@@ -82,24 +144,21 @@ class LookupCompaniesHelper:
             and result have the same address. Could add search terms as a subset for duplicated call
         """
         search_results_df['address_match'] = search_results_df.duplicated(subset=['Address'], keep=False) 
-        search_results_df['ubi_match'] = search_results_df.duplicated(subset=['UBINumber'], keep=False)
-        search_results_df['id_match'] = search_results_df.duplicated(subset=['BusinessId'], keep=False)
 
     def _get_all_company_name_match_search_results(self, owner_name):
         n = 1
         res_length = 100
         search_results = []
         
-        while res_length == 100:
-            res = self._get_business_search_results(owner_name, n)
-            search_results += (res)
-            n += 1
-            res_length = len(res)
+        # while res_length == 100:
+        res = self._get_business_search_results(owner_name, n)
+            # search_results += (res)
+            # n += 1
+            # res_length = len(res)
         
-        return search_results
+        return res
 
     """
-    TODO: Remove the ubi and address match, this does nothing to help
     """
     def _get_potential_company_name_matches(self, owner_name):        
         all_search_results = self._get_all_company_name_match_search_results(owner_name)
@@ -111,40 +170,59 @@ class LookupCompaniesHelper:
         """
             utils to separate search results into exact match, potential match (where no exact match was found), 
             and additional matches (extra matches if there was an exact match and additional matches)
+            TODO: Give more robust answers here! Other abbreviations include: 
+                - Apartment: APTS -> Apartments
+                - Partnership
+                - etc.
         """
         def is_exact_match(row):
             """ Extract exact matches, including some regex magic. """
             search = row["SearchTerm"]
             result = row["BusinessName"]
-
+            
             # examples: LLC, LLP, L L C, L.L.C., L.L.C. L.L.P., L.L.P, LLC.
             # Limited Partnership, Limited liability company
             p = re.compile("L[\s.]?L[\s,.]?[PC][.]" ,flags=re.IGNORECASE)
-            result=result.replace(",", "")
+            
+            replace_map = {
+                ",": "", 
+                "LIMITED LIABILITY COMPANY":"LLC",
+                "LIMITED PARTNERSHIP": "LLC",
+                "APARTMENTS": "APTS", 
+                "LTD PS": "LLC", 
+                "LTD PARTNERSHIP": "LLC",
+             }
+            
             result= re.sub(p, "LLC", result)
-            result=result.replace("LIMITED LIABILITY COMPANY", "LLC") 
-            result=result.replace("LIMITED PARTNERSHIP", "LLC") 
-
-            search=search.replace(",", "")
             search=re.sub(p, "LLC", search)
-            search=search.replace("LIMITED PARTNERSHIP", "LLC") 
-            search=search.replace("LIMITED LIABILITY COMPANY", "LLC") 
+
+            for k,v in replace_map.items(): 
+                result = result.replace(k, v)
+                search = search.replace(k, v)
+                
+            # result=result.replace(",", "")
+            # result=result.replace("LIMITED LIABILITY COMPANY", "LLC") 
+            # result=result.replace("LIMITED PARTNERSHIP", "LLC") 
+
+            # search=search.replace(",", "")
+            # search=search.replace("LIMITED PARTNERSHIP", "LLC") 
+            # search=search.replace("LIMITED LIABILITY COMPANY", "LLC") 
 
             return search == result
         
         exact_matches = self._get_empty_df()
-        exact_matches.columns
         potential_matches = self._get_empty_df()
-        additional_matches = self._get_empty_df()
+        # additional_matches = self._get_empty_df()
         
         exact_match = results[results.apply(lambda row: is_exact_match(row), axis=1)]
-        if len(exact_match) > 0:
-            exact_matches = pd.concat([exact_matches, exact_match], ignore_index=True)
-            additional_matches = pd.concat([additional_matches, results[results['SearchTerm'] != results['BusinessName']]], ignore_index=True)
+        # TODO: If going to do len(results) check, then need to filter by business nature sooner
+        # Len results heuristic doesn't work for empty searches, or the recursive search
+        if len(exact_match) > 0: #or len(results) == 1:
+            exact_matches = pd.DataFrame(results.iloc[0]).T
         else:
             potential_matches = pd.concat([potential_matches, results], ignore_index=True)
         
-        return exact_matches, potential_matches, additional_matches
+        return exact_matches, potential_matches
 
     def get_company_list_name_matches(self, owner_list: list):
         """
@@ -152,19 +230,19 @@ class LookupCompaniesHelper:
             owner_list: a list of owner names that will be searched in the CCFS database for matches.
             Exact_matches: when search term exactly matches a result in CCFS database. 
             Potential_matches: when search term doesn't exactly match, there needs to be some human verification here to determine. 
-            Additional_matches: extraneous matches in case potential_matches didn't yield enough results. 
         """
         exact_matches = self._get_empty_df()
         potential_matches = self._get_empty_df()
-        additional_matches = self._get_empty_df()
+        # TODO: Instead of additional matches, make a df for "no matches"
         
         for owner in owner_list:
+            owner = owner.strip() # Clean owner name slightly
             matches = self._get_potential_company_name_matches(owner)
-            temp_exact, temp_potential, temp_add = self._separate_search_results(matches)
+            temp_exact, temp_potential = self._separate_search_results(matches)
             exact_matches = pd.concat([temp_exact, exact_matches], ignore_index=True)
             potential_matches = pd.concat([temp_potential, potential_matches], ignore_index=True)
-            additional_matches = pd.concat([temp_add, additional_matches], ignore_index=True)
-        return exact_matches, potential_matches, additional_matches
+            # additional_matches = pd.concat([temp_add, additional_matches], ignore_index=True)
+        return exact_matches, potential_matches
     
 
     def get_company_matches_and_export(self, owner_list: list, x: int):
@@ -261,7 +339,7 @@ class GroupCompaniesHelper:
             return principals
         business_ids = [res['BusinessID'] for res in search_results]
         business_names = [res['BusinessName'] for res in search_results]
-        ubi_nums = [res['UBINumber'] for res in search_results]
+        # ubi_nums = [res['UBINumber'] for res in search_results]
         
         for id, name in zip(business_ids, business_names):
             business_json = get_business_details(id)
diff --git a/processors/gre-llc.py b/processors/gre-llc.py
index 509c4d2..1e7638f 100644
--- a/processors/gre-llc.py
+++ b/processors/gre-llc.py
@@ -5,44 +5,36 @@ Created on Fri Aug 15 19:06:45 2025
 
 @author: linnea
 
+Script to 
+
 Address: 308 4th Ave S, Seattle, WA, 98104
 ParcelNumber: 5247801370
 ListedOwner: GRE DOWNTOWNER LLC      
 PreviousBuyer: CENTRAL PUGET SOUND REGIONAL TRASNSIT AUTHORITY
-         
-
+        
 GRE List: https://goodmanre.com/our-projects/
 
 TODO: 
     - Make a flag that shows if the buywer / owner are similar
-    - Check the fuzzy wuzzy matching in utils
     - Get the address field from CCFS, put in corp_owners
     - If the previous buyer doesn't make sense, 
         get the year of the last buying to see if it's at all recent for sanity checks
-
-1. Load in the whole dataframe of owners and buyers
-2. Get the whole list of responses for the listed owner
-    - This shows all the companies that match the listed owner in assessor data
-    - Need to find the most likely company in CCFS to match the listed owner
-3. Make a df out of? 
-4. 
 """
 
 from corp_owners import LookupCompaniesHelper, GroupCompaniesHelper
 import pandas as pd
 
 lookup_helper = LookupCompaniesHelper(("../data/intermediates"))
-df = pd.read_csv("../data/intermediates/owners_listed.csv")
 
-# Almost never need additional matches, as it's only populated if there's an exact match
-exact, potential, additional = lookup_helper.get_company_list_name_matches(["GRE DOWNTOWNER LLC"])
-owner_names = df["ListedOwner"].unique()
-# exact, potential, additional = lookup_helper.get_company_list_name_matches(owner_names[:10])
+# Option 1: Uncomment the two lines to run the full script. 
+# df = pd.read_csv("../data/intermediates/owners_listed.csv")
+# owner_names = df["ListedOwner"].unique()
+
+# Option 2: Uncomment two lines to run with a specific subset for debugging
+df = pd.read_excel("../experiments/gre_apartments.ods", engine='odf')
+df = df.iloc[1]
+owner_names = [df["ListedOwner"]]
+
+exact, potential = lookup_helper.get_company_list_name_matches(owner_names)
 
-if(len(exact) >= 1):
-    ubi = exact.loc[0, "UBINumber"]
-    
-group_helper= GroupCompaniesHelper("../data/intermediates", "principals")
-# TODO: Figure out how to format the url for proper response
-res_group = group_helper.get_companies_principals(exact)
 
diff --git a/processors/merge.py b/processors/merge.py
index 50302e0..8cce5ba 100644
--- a/processors/merge.py
+++ b/processors/merge.py
@@ -2,9 +2,12 @@
 # -*- coding: utf-8 -*-
 """
 Created on Tue Aug 12 18:17:47 2025
-
 @author: linnea
 
+One-time script for cleaning up parcel lookup data.
+If everything went 100% smoothly in scrape.py (ie. no search results came back empty), 
+then this script shouldn't be needed. 
+
 1. Load intermediate results
 2. Load original data with unmodified parcelid
 3. Add a taxparcelid to unmodified so can merge
diff --git a/processors/parcel_owners.py b/processors/parcel_owners.py
index e1663e5..a82d1b5 100644
--- a/processors/parcel_owners.py
+++ b/processors/parcel_owners.py
@@ -1,3 +1,8 @@
+"""
+    Utils for finding a parcel owner given an address. 
+    Data source is King County Assessor. 
+"""
+
 import pandas as pd
 from bs4 import BeautifulSoup
 import requests
@@ -43,13 +48,7 @@ class ParcelLookupHelper:
         if data_not_found:
             return None
         return html_soup
-    
-    # TODO: Maybe include sales history AND current owner? 
-    # Example: 308 4TH AVE S 98104
-    # Current owner = GRE DOWNTOWNER LLC 
-    # Sales history = CENTRAL PUGET SOUND REGIONAL TRASNSIT AUTHORITY
-    # Website also shows GRE DOWNTOWNER https://www.addisononfourth.com/
-    # TODO: cache the whole soup object so can lookup later? 
+    s
     def _get_owner_name_from_soup(self, soup: object):
         """
         Extract the owner name from a given BeautifulSoup object, `soup`, of a Property Detail page.
@@ -60,34 +59,8 @@ class ParcelLookupHelper:
         parent = title.parent
         next_tr = title and parent.find_next('tr')
         table = next_tr and next_tr.table
-        return table and table.find_all('td')[5].text
-    
-    def _get_num_units_and_types_from_soup(self, soup: object):
-        """
-        Given a BeautifulSoup object, `soup`, of a Property Detail page, extract:
-            - the number of units in the building
-            - the unit types
-            - the sq ft of each unit type
-            - number of bed/bath rooms in each unit type
-        """
-        title = soup.find('span', text = 'Unit Breakdown')
-        if not title:
-            return { 'numUnits': 'NOT_FOUND', 'unitDetails': 'NOT_FOUND' }
-                     
-        table = title and title.find_next('div').table
-        table_rows = table and table.find_all('tr')[1:]
-        cells = table_rows and [row.find_all('td') for row in table_rows]
-        table_data = []
-        
-        for c in cells:
-            table_data.append([span.text for span in c])
-            total_units = sum([int(row[1]) for row in table_data])
-            dict_keys = ['type', 'number', 'sqft', 'bed', 'bath']
-            units = [dict(zip(dict_keys, row)) for row in table_data]
-        return { 'numUnits': total_units, 'unitDetails': units }
+        return table and table.find_all('td')[5].texts
 
-    
-    # TODO: pass maybe a list of features want to extract? 
     def _scrape_parcel_owners(self, 
                               tax_parcel_id_numbers: list, 
                               file_name: str):
@@ -110,6 +83,10 @@ class ParcelLookupHelper:
         self._write_parcel_owner_csv(parcel_df, file_name)
     
     def _save_html(self, soup, id):
+        """
+            Given a 'soup' type response for an address lookup, save
+            as an html file for future lookups. 
+        """
         table = soup.find("table", attrs={"class":"_table2", "id":"TABLE1"})
         with open(f"{self.output_path}/html/{id}.html", 'w') as f: 
             f.write(str(table))
@@ -122,7 +99,6 @@ class ParcelLookupHelper:
             return "NOT FOUND"
         else:
             return self._get_owner_name_from_soup(parcel_soup)
-            # parcel_df.loc[len(parcel_df.index)] = [id, owner_name]
         
                      
     def _scrape_parcel_owners_and_unit_details(self, tax_parcel_id_numbers: list, file_name: str):
diff --git a/processors/scrape.py b/processors/scrape.py
index 8ed090b..cd8e2e6 100644
--- a/processors/scrape.py
+++ b/processors/scrape.py
@@ -1,13 +1,15 @@
+"""
+Script for getting all apartment addressses in King County, 
+and looking up their parcel owners in KC Assessor. 
+Only runs in set increments to avoid being blocked by Assessor site. 
+"""
 import pandas as pd
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-import time
 import os
 from parcel_owners import ParcelLookupHelper
 
-incr = 998
+incr = 998 # Number of addressses to look up per run. 
 
-class ParcelScraper: 
+class ApartmentDataLoader: 
     def __init__(self, path):
         self.path = path # path to the csv
         # self.driver = self.load_driver()
@@ -35,31 +37,6 @@ class ParcelScraper:
             if idx % 50 == 0: 
                 print(f"Saving row {idx}")
                 self.df.to_csv("apartments_with_owners.csv") 
-
-    def submit_parcel(self, parcel): 
-        self.driver.get("https://blue.kingcounty.com/Assessor/eRealProperty/default.aspx")
-        print(f"https://blue.kingcounty.com/Assessor/eRealProperty/Dashboard.aspx?ParcelNbr={parcel}")
-        self.driver.get(f"https://blue.kingcounty.com/Assessor/eRealProperty/Dashboard.aspx?ParcelNbr={parcel}")
-        parcel_name = ""
-        try:
-            # parcel_form = self.driver.find_element(By.ID, "cphContent_txtParcelNbr")
-            # parcel_form.send_keys(parcel)
-            
-            # search_box = self.driver.find_element(By.NAME, "kingcounty_gov$cphContent$btn_Search")
-            # search_box.click()
-            
-            # Wait until the table view has loaded
-            # table_loaded = self.driver.find_element(By.ID, "topnavlistbtn")
-            # wait = WebDriverWait(self.driver, timeout=5)
-            # wait.until(lambda _: table_loaded.is_displayed())
-            
-            name = self.driver.find_element(By.XPATH, "/html/body/form/table/tbody/tr/td[2]/table/tbody/tr[2]/td[1]/table/tbody/tr[2]/td/div/table/tbody/tr[2]/td[2]")
-            parcel_name = name.text
-            # print(name.text)
-        except:
-            print(f"Couldn't find parcel name for parcel number {parcel}")
-            
-        return parcel_name
     
     def get_parcel_number(self, major, minor):
         return str(major).rjust(6, "0") + str(minor).rjust(4,"0")
@@ -73,10 +50,9 @@ if __name__ == "__main__":
         pass
     
     print(f"starting at index {nrows}")
-
-    scraper = ParcelScraper("EXTR_AptComplex.csv")
     
-    df = scraper.df.loc[nrows:nrows + incr]
+    loader = ApartmentDataLoader("EXTR_AptComplex.csv")
+    df = loader.df.loc[nrows:nrows + incr]
     
     parcelHelper = ParcelLookupHelper(os.getcwd(), True)
     parcelHelper.scrape_parcel_owners(df["ParcelNumber"], f"raw/owners_{nrows // incr}", False)
diff --git a/requirements-conda.txt b/requirements-conda.txt
new file mode 100644
index 0000000..95900eb
--- /dev/null
+++ b/requirements-conda.txt
@@ -0,0 +1,69 @@
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+# created-by: conda 25.5.1
+_libgcc_mutex=0.1=main
+_openmp_mutex=5.1=1_gnu
+beautifulsoup4=4.13.5=py313h06a4308_0
+blas=1.0=mkl
+bottleneck=1.4.2=py313hf0014fa_0
+brotlicffi=1.0.9.2=py313h6a678d5_1
+bs4=4.13.5=py39hd3eb1b0_0
+bzip2=1.0.8=h5eee18b_6
+ca-certificates=2025.9.9=h06a4308_0
+certifi=2025.8.3=py313h06a4308_0
+cffi=1.17.1=py313h1fdaa30_1
+charset-normalizer=3.3.2=pyhd3eb1b0_0
+defusedxml=0.7.1=pyhd3eb1b0_0
+expat=2.7.1=h6a678d5_0
+idna=3.7=py313h06a4308_0
+intel-openmp=2025.0.0=h06a4308_1171
+ld_impl_linux-64=2.40=h12ee557_0
+libffi=3.4.4=h6a678d5_1
+libgcc-ng=11.2.0=h1234567_1
+libgomp=11.2.0=h1234567_1
+libmpdec=4.0.0=h5eee18b_0
+libstdcxx-ng=11.2.0=h1234567_1
+libuuid=1.41.5=h5eee18b_0
+libxcb=1.17.0=h9b100fa_0
+libzlib=1.3.1=hb25bd0a_0
+mkl=2025.0.0=hacee8c2_941
+mkl-service=2.4.0=py313h5eee18b_3
+mkl_fft=1.3.11=py313hacdc0fc_1
+mkl_random=1.2.8=py313h8928b4f_1
+ncurses=6.5=h7934f7d_0
+numexpr=2.11.0=py313h41d4191_1
+numpy=2.3.3=py313h720eef7_0
+numpy-base=2.3.3=py313h95072fd_0
+odfpy=1.4.1=pyhd8ed1ab_1
+openssl=3.0.17=h5eee18b_0
+pandas=2.3.2=py313h280b501_0
+pip=25.2=pyhc872135_0
+pthread-stubs=0.3=h0ce48e5_1
+pycparser=2.23=py313h06a4308_0
+pysocks=1.7.1=py313h06a4308_0
+python=3.13.7=h7e8bc2b_100_cp313
+python-dateutil=2.9.0post0=py313h06a4308_2
+python-tzdata=2025.2=pyhd3eb1b0_0
+python_abi=3.13=1_cp313
+pytz=2025.2=py313h06a4308_0
+readline=8.3=hc2a1206_0
+requests=2.32.5=py313h06a4308_0
+setuptools=72.1.0=py313h06a4308_0
+six=1.17.0=py313h06a4308_0
+soupsieve=2.5=py313h06a4308_0
+sqlite=3.50.2=hb25bd0a_1
+tbb=2022.0.0=hdb19cb5_0
+tbb-devel=2022.0.0=hdb19cb5_0
+tk=8.6.15=h54e0aa7_0
+typing-extensions=4.15.0=py313h06a4308_0
+typing_extensions=4.15.0=py313h06a4308_0
+tzdata=2025b=h04d1e81_0
+urllib3=2.5.0=py313h06a4308_0
+wheel=0.45.1=py313h06a4308_0
+xorg-libx11=1.8.12=h9b100fa_1
+xorg-libxau=1.0.12=h9b100fa_0
+xorg-libxdmcp=1.1.5=h9b100fa_0
+xorg-xorgproto=2024.1=h5eee18b_1
+xz=5.6.4=h5eee18b_1
+zlib=1.3.1=hb25bd0a_0
-- 
2.49.0


From 93b81bfb00c4139e306d4c826630224ac601a1a4 Mon Sep 17 00:00:00 2001
From: Linnea <linnealovespie@proton.me>
Date: Mon, 22 Sep 2025 20:22:03 -0700
Subject: [PATCH 2/4] add pip list

---
 requirements.txt | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..36e00ca
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,28 @@
+beautifulsoup4==4.13.5
+Bottleneck==1.4.2
+brotlicffi==1.0.9.2
+certifi==2025.8.3
+cffi==1.17.1
+charset-normalizer==3.3.2
+defusedxml==0.7.1
+idna==3.7
+mkl_fft==1.3.11
+mkl_random==1.2.8
+mkl-service==2.4.0
+numexpr==2.11.0
+numpy==2.3.3
+odfpy==1.4.1
+pandas==2.3.2
+pip==25.2
+pycparser==2.23
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+pytz==2025.2
+requests==2.32.5
+setuptools==72.1.0
+six==1.17.0
+soupsieve==2.5
+typing_extensions==4.15.0
+tzdata==2025.2
+urllib3==2.5.0
+wheel==0.45.1
-- 
2.49.0


From c82df4b0fb88109db5213c8fab5278f5faa8c299 Mon Sep 17 00:00:00 2001
From: Linnea <linnealovespie@proton.me>
Date: Thu, 25 Sep 2025 14:26:45 -0700
Subject: [PATCH 3/4] Confirm minio access

---
 README.md           | 2 +-
 lib/minio_helper.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e12722b..0e3373a 100644
--- a/README.md
+++ b/README.md
@@ -35,4 +35,4 @@ Use `lib/minio_helper.py` to extend the functionality
 
 Run `test_minio` in `lib/main.py` to test out that it works (TODO: move this to own testing script, perhaps unit tests)
 
-Note: You will need to have access_key and secret_key in your env before running for this to work, contact @linnealovespie or @ammaratef45 to obtain these keys)
+Note: You will need to have minio_access_key and minio_secret_key in your env before running for this to work, contact @linnealovespie or @ammaratef45 to obtain these keys)
diff --git a/lib/minio_helper.py b/lib/minio_helper.py
index ba6392b..8d3c447 100644
--- a/lib/minio_helper.py
+++ b/lib/minio_helper.py
@@ -7,8 +7,8 @@ class MinioHelper:
     def __init__(self, bucket_name: str):
         self.client = Minio(
             "minio.radmin.live",
-            access_key=os.environ['access_key'],
-            secret_key=os.environ['secret_key']
+            access_key=os.environ['minio_access_key'],
+            secret_key=os.environ['minio_secret_key']
         )
         self.bucket_name = bucket_name
 
-- 
2.49.0


From fae15e05b131947aa39e85fed22a921e5ea83568 Mon Sep 17 00:00:00 2001
From: Linnea <linnealovespie@proton.me>
Date: Thu, 25 Sep 2025 14:39:15 -0700
Subject: [PATCH 4/4] clean up comments and todos

---
 processors/corp_owners.py | 39 ++++++++++++---------------------------
 processors/gre-llc.py     |  7 +++++--
 processors/merge.py       |  3 ---
 3 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/processors/corp_owners.py b/processors/corp_owners.py
index 00513c6..56f40b8 100644
--- a/processors/corp_owners.py
+++ b/processors/corp_owners.py
@@ -13,10 +13,10 @@ import requests
 import json
 import os
 import re
-# import geopandas as gp
 import urllib.parse
 
 search_for_business_url = 'https://ccfs-api.prod.sos.wa.gov/api/BusinessSearch/GetBusinessSearchList'
+# Old search URL, holding onto in case the above gets blocked
 # search_for_business_url = 'https://cfda.sos.wa.gov/api/BusinessSearch/GetBusinessSearchList'
 principal_url = 'https://ccfs-api.prod.sos.wa.gov/api/BusinessSearch/GetAdvanceBusinessSearchList'
 
@@ -45,9 +45,9 @@ def get_business_details(business_id):
     """ Get business details from the Corporation and charities filing database. 
     """
     url = f"https://ccfs-api.prod.sos.wa.gov/api/BusinessSearch/BusinessInformation?businessID={business_id}"
+    # Old search URL, holding onto in case the above gets blocked
     # url = 'https://cfda.sos.wa.gov/#/BusinessSearch/BusinessInformation?businessID={business_id}'.format(business_id=business_id)
     if(os.path.exists(f"../data/inputs/principals_json/{business_id}.json")):
-        # print("found json")
         with open(f"../data/inputs/principals_json/{business_id}.json", 'r') as f: 
             return json.load(f)
     else: 
@@ -83,10 +83,9 @@ class LookupCompaniesHelper:
         while no_result and len(business_name) > 0:
             print(f"searching with name {business_name}")
             r = requests.post(search_for_business_url, get_business_search_payload(business_name, 100, page_num))
-            # TODO: add back the try-catch, but with better recovery this time
-            # Seems like it's more of a network issue than didn't find anything
+            # TODO: add some more error handling in case of connectivity issues. 
             if r.status_code == 429: 
-                # TODO: Raise an error instead
+                # TODO: Raise an error
                 print("This IP address has likely been blocked by CCFS, try using a vpn")
             result = json.loads(r.text)
             if len(result) > 0:
@@ -98,7 +97,7 @@ class LookupCompaniesHelper:
                     last_space = business_name[::-1].index(" ") 
                     business_name = business_name[: -1 - last_space].strip()
                 except ValueError: 
-                    # TODO: In this case, try with the LastBuyer in stead of ListedOwner? Upstream
+                    # TODO: In this case, try with the LastBuyer in stead of ListedOwner? 
                     print(f"Found no business with name {business_name_orig}\n")
                     business_name = ""
                     
@@ -106,7 +105,6 @@ class LookupCompaniesHelper:
         return result
 
     def _extract_search_results(self, search_term, search_req_response):
-        # TODO: If no results, return a row with the search term and nans for everything else
         res_list = []
         for res in search_req_response:
             # build up the known responses
@@ -128,9 +126,12 @@ class LookupCompaniesHelper:
         # Clean some of the results a bit more:
         # Keep only active companies and searches that yielded no results
         res_df = res_df[(res_df["Status"]=="Active") | (res_df["Status"]=="NOT_FOUND")]
-        # TODO: Maybe keep only real estate / property investments? 
+
+        # TODO: Maybe add a filter on BusinessNature for only real estate/ property investments
+        # TODO: First need to get an idea of all the BusinessNature types
 
         # Keep a list of exact matches, or later build a list of potential matches that we give to human verifiers
+        # This check is very simple heuristic and more robust matching will occur later in processing
         exact_match = res_df.index[res_df['BusinessName'] == search_term].tolist()
         if exact_match:
             res_df = pd.concat([res_df.iloc[[exact_match[0]],:], res_df.drop(exact_match[0], axis=0)], axis=0)
@@ -150,12 +151,7 @@ class LookupCompaniesHelper:
         res_length = 100
         search_results = []
         
-        # while res_length == 100:
-        res = self._get_business_search_results(owner_name, n)
-            # search_results += (res)
-            # n += 1
-            # res_length = len(res)
-        
+        res = self._get_business_search_results(owner_name, n)     
         return res
 
     """
@@ -199,20 +195,11 @@ class LookupCompaniesHelper:
             for k,v in replace_map.items(): 
                 result = result.replace(k, v)
                 search = search.replace(k, v)
-                
-            # result=result.replace(",", "")
-            # result=result.replace("LIMITED LIABILITY COMPANY", "LLC") 
-            # result=result.replace("LIMITED PARTNERSHIP", "LLC") 
-
-            # search=search.replace(",", "")
-            # search=search.replace("LIMITED PARTNERSHIP", "LLC") 
-            # search=search.replace("LIMITED LIABILITY COMPANY", "LLC") 
 
             return search == result
         
         exact_matches = self._get_empty_df()
         potential_matches = self._get_empty_df()
-        # additional_matches = self._get_empty_df()
         
         exact_match = results[results.apply(lambda row: is_exact_match(row), axis=1)]
         # TODO: If going to do len(results) check, then need to filter by business nature sooner
@@ -233,7 +220,7 @@ class LookupCompaniesHelper:
         """
         exact_matches = self._get_empty_df()
         potential_matches = self._get_empty_df()
-        # TODO: Instead of additional matches, make a df for "no matches"
+        # TODO: Make a df for search terms with no matches and how to make it mesh well with recursive search
         
         for owner in owner_list:
             owner = owner.strip() # Clean owner name slightly
@@ -241,7 +228,6 @@ class LookupCompaniesHelper:
             temp_exact, temp_potential = self._separate_search_results(matches)
             exact_matches = pd.concat([temp_exact, exact_matches], ignore_index=True)
             potential_matches = pd.concat([temp_potential, potential_matches], ignore_index=True)
-            # additional_matches = pd.concat([temp_add, additional_matches], ignore_index=True)
         return exact_matches, potential_matches
     
 
@@ -251,11 +237,10 @@ class LookupCompaniesHelper:
             match CSV's in the folder determined by `output_path`
         """
         print(f"Saving output files to {self.output_path}")
-        exact_matches, potential_matches, additional_matches = self.get_company_list_name_matches(owner_list)
+        exact_matches, potential_matches = self.get_company_list_name_matches(owner_list)
         
         exact_matches.to_csv(f'{self.output_path}/exact_matches_{x}.csv')
         potential_matches.to_csv(f'{self.output_path}/potential_matches_{x}.csv')
-        additional_matches.to_csv(f'{self.output_path}/additional_matches_{x}.csv')
 
 class GroupCompaniesHelper:
     def __init__(self, out_path: str, out_name: str):
diff --git a/processors/gre-llc.py b/processors/gre-llc.py
index 1e7638f..52989f0 100644
--- a/processors/gre-llc.py
+++ b/processors/gre-llc.py
@@ -5,13 +5,16 @@ Created on Fri Aug 15 19:06:45 2025
 
 @author: linnea
 
-Script to 
+Script to find exact and potential search results for a parcel owner in the CCFS database
 
+A representative example for the parcel owner (assessor) data scraping step
 Address: 308 4th Ave S, Seattle, WA, 98104
 ParcelNumber: 5247801370
 ListedOwner: GRE DOWNTOWNER LLC      
 PreviousBuyer: CENTRAL PUGET SOUND REGIONAL TRASNSIT AUTHORITY
-        
+
+We happen to already know the answer, 
+which is this address is part of Goodman Real Estate's extensive portfolio   
 GRE List: https://goodmanre.com/our-projects/
 
 TODO: 
diff --git a/processors/merge.py b/processors/merge.py
index 8cce5ba..ef619a0 100644
--- a/processors/merge.py
+++ b/processors/merge.py
@@ -69,9 +69,7 @@ if __name__ == "__main__":
     # Add address from df_apts to df_raw
     df_join = df_apts.merge(df_raw, 'left', on="ParcelNumber")
     df_join["ListedOwner"] = "NOT_FOUND"
-    # df_join["ListedOwner"] = df_join.apply(lambda row: get_listed_owner(row), axis=1)
     for idx, row in df_join.iterrows():
-        # df_join.loc[idx, "ListedOwner"] = get_listed_owner(row)
         row.ListedOwner = get_listed_owner(row)
         df_join.loc[idx] = row
         if idx % 500 == 0: 
@@ -79,7 +77,6 @@ if __name__ == "__main__":
             df_join.to_csv(f"{intermediates_path}/owners_listed.csv")
             
     df_join.to_csv(f"{intermediates_path}/owners_listed.csv")
-    # df_join = df_join.rename(columns={"Owner":"RecentBuyer"})
     
     
-- 
2.49.0