WIP: draft corporate name improvements. #16

Draft
jessib wants to merge 4 commits from biz-name-jb-8 into main

View File

@ -106,33 +106,52 @@ class LookupCompaniesHelper:
- Partnership
- etc.
"""
def normalize_name(term):
# add space at at end of beginning to simplify matching words without using regexp (and then will remove)
term = " " + term.upper() + " "
term = re.sub(r"\s+", " ", term)
# examples: LLC, LLP, L L C, L.L.C., L.L.C. L.L.P., L.L.P, LLC.
# This requires space before and after
p = re.compile(r" L[\s.]?L[\s,.]?[PC][.]? ")
term = re.sub(p, "LLC", term)
term = term.replace(",", "")
word_replace_map = {
"LIMITED LIABILITY COMPANY": "LLC",
"LIMITED PARTNERSHIP": "LLC",
"APARTMENTS": "APTS",
"LTD PS": "LLC",
"LTD PARTNERSHIP": "LLC",
"ST": "STREET",
"AVE": "AVENUE",
"BLVD": "BOULEVARD",
"PRPTS": "PROPERTIES",
"PPTY": "PROPERTY",
"BLDG": "BUILDING",
"HLDGS": "HOLDINGS",
"GRP": "GROUP",
"INVSTMNTS": "INVESTMENTS",
"FMLY": "FAMILY",
"CO": "COMPANY",
"CORP": "CORPORATION",
"&": "AND",
"APT": "APARTMENT",
"APTS": "APARTMENTS",
}
for k,v in word_replace_map.items():
term = term.replace(" " + k + " ", " " + v + " ")
return term.strip()
def is_exact_match(row, searchTerm):
""" Extract exact matches, including some regex magic. """
search = searchTerm
result = row["BusinessName"]
# examples: LLC, LLP, L L C, L.L.C., L.L.C. L.L.P., L.L.P, LLC.
# Limited Partnership, Limited liability company
p = re.compile("L[\s.]?L[\s,.]?[PC][.]" ,flags=re.IGNORECASE)
replace_map = {
",": "",
"LIMITED LIABILITY COMPANY":"LLC",
"LIMITED PARTNERSHIP": "LLC",
"APARTMENTS": "APTS",
"LTD PS": "LLC",
"LTD PARTNERSHIP": "LLC",
}
result= re.sub(p, "LLC", result)
search=re.sub(p, "LLC", search)
return normalize_name(search) == normalize_name(result)
for k,v in replace_map.items():
result = result.replace(k, v)
search = search.replace(k, v)
return search == result
exact_matches = self._get_empty_df()
potential_matches = self._get_empty_df()