From 672c474fb8af286cfe08b7df5e32d86b35168ce8 Mon Sep 17 00:00:00 2001 From: jessib Date: Mon, 29 Dec 2025 20:18:53 -0800 Subject: [PATCH 1/3] draft corporate name improvements. --- processors/corp_owners.py | 63 +++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/processors/corp_owners.py b/processors/corp_owners.py index 56f40b8..83b7509 100644 --- a/processors/corp_owners.py +++ b/processors/corp_owners.py @@ -171,33 +171,52 @@ class LookupCompaniesHelper: - Partnership - etc. """ + def normalize_name(term) + # add space at at end of beginning to simplify matching words without using regexp (and then will remove) + term = " " + term.upper() + " " + term = re.sub(r"\s+", " ", term) + + # examples: LLC, LLP, L L C, L.L.C., L.L.C. L.L.P., L.L.P, LLC. + # This requires space before and after + p = re.compile(" L[\s.]?L[\s,.]?[PC][.]? ") + term = re.sub(p, "LLC", term) + + term = term.replace(",", "") + + word_replace_map = { + "LIMITED LIABILITY COMPANY": "LLC", + "LIMITED PARTNERSHIP": "LLC", + "APARTMENTS": "APTS", + "LTD PS": "LLC", + "LTD PARTNERSHIP": "LLC", + "ST": "STREET", + "AVE": "AVENUE", + "BLVD": "BOULEVARD", + "PRPTS": "PROPERTIES", + "PPTY": "PROPERTY", + "BLDG": "BUILDING", + "HLDGS": "HOLDINGS", + "GRP": "GROUP", + "INVSTMNTS": "INVESTMENTS", + "FMLY": "FAMILY", + "CO": "COMPANY", + "CORP": "CORPORATION", + "&": "AND", + "APT": "APARTMENT", + "APTS": "APARTMENTS", + } + + for k,v in word_replace_map.items(): + term = term.replace(" " + k + " ", " " + v + " ") + + return term.strip() + def is_exact_match(row): """ Extract exact matches, including some regex magic. """ search = row["SearchTerm"] result = row["BusinessName"] - - # examples: LLC, LLP, L L C, L.L.C., L.L.C. L.L.P., L.L.P, LLC. - # Limited Partnership, Limited liability company - p = re.compile("L[\s.]?L[\s,.]?[PC][.]" ,flags=re.IGNORECASE) - - replace_map = { - ",": "", - "LIMITED LIABILITY COMPANY":"LLC", - "LIMITED PARTNERSHIP": "LLC", - "APARTMENTS": "APTS", - "LTD PS": "LLC", - "LTD PARTNERSHIP": "LLC", - } - - result= re.sub(p, "LLC", result) - search=re.sub(p, "LLC", search) + return normalize_name(search) == normalize_name(result) - for k,v in replace_map.items(): - result = result.replace(k, v) - search = search.replace(k, v) - - return search == result - exact_matches = self._get_empty_df() potential_matches = self._get_empty_df() -- 2.49.0 From 900c051800d27f9313d446b870cdb11d9703b8fe Mon Sep 17 00:00:00 2001 From: jessib Date: Mon, 5 Jan 2026 21:19:38 -0800 Subject: [PATCH 2/3] typo --- processors/corp_owners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processors/corp_owners.py b/processors/corp_owners.py index 83b7509..dc48850 100644 --- a/processors/corp_owners.py +++ b/processors/corp_owners.py @@ -171,7 +171,7 @@ class LookupCompaniesHelper: - Partnership - etc. """ - def normalize_name(term) + def normalize_name(term): # add space at at end of beginning to simplify matching words without using regexp (and then will remove) term = " " + term.upper() + " " term = re.sub(r"\s+", " ", term) -- 2.49.0 From 2a97e1ccb7eb26a956916450937befcc3e03528d Mon Sep 17 00:00:00 2001 From: jessib Date: Tue, 6 Jan 2026 16:34:54 -0800 Subject: [PATCH 3/3] escape regexp pattern --- processors/corp_owners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processors/corp_owners.py b/processors/corp_owners.py index dc48850..684a2d9 100644 --- a/processors/corp_owners.py +++ b/processors/corp_owners.py @@ -178,7 +178,7 @@ class LookupCompaniesHelper: # examples: LLC, LLP, L L C, L.L.C., L.L.C. L.L.P., L.L.P, LLC. # This requires space before and after - p = re.compile(" L[\s.]?L[\s,.]?[PC][.]? ") + p = re.compile(r" L[\s.]?L[\s,.]?[PC][.]? ") term = re.sub(p, "LLC", term) term = term.replace(",", "") -- 2.49.0