Spaces:
Running
Running
Update standardize_location.py
Browse files- standardize_location.py +91 -83
standardize_location.py
CHANGED
|
@@ -1,83 +1,91 @@
|
|
| 1 |
-
import requests
|
| 2 |
-
import re
|
| 3 |
-
import os
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
"
|
| 16 |
-
"
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
country.get("
|
| 45 |
-
country.get("
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import re
|
| 3 |
+
import os
|
| 4 |
+
import model
|
| 5 |
+
# Normalize input
|
| 6 |
+
def normalize_key(text):
|
| 7 |
+
return re.sub(r"[^a-z0-9]", "", text.strip().lower())
|
| 8 |
+
|
| 9 |
+
# Search for city/place (normal flow)
|
| 10 |
+
def get_country_from_geonames(city_name):
|
| 11 |
+
url = os.environ["URL_SEARCHJSON"]
|
| 12 |
+
username = os.environ["USERNAME_GEO"]
|
| 13 |
+
print("geoname: ", cityname)
|
| 14 |
+
params = {
|
| 15 |
+
"q": city_name,
|
| 16 |
+
"maxRows": 1,
|
| 17 |
+
"username": username
|
| 18 |
+
}
|
| 19 |
+
try:
|
| 20 |
+
r = requests.get(url, params=params, timeout=5)
|
| 21 |
+
data = r.json()
|
| 22 |
+
if data.get("geonames"):
|
| 23 |
+
return data["geonames"][0]["countryName"]
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print("GeoNames searchJSON error:", e)
|
| 26 |
+
return None
|
| 27 |
+
|
| 28 |
+
# Search for country info using alpha-2/3 codes or name
|
| 29 |
+
def get_country_from_countryinfo(input_code):
|
| 30 |
+
url = os.environ["URL_COUNTRYJSON"]
|
| 31 |
+
username = os.environ["USERNAME_GEO"]
|
| 32 |
+
print("countryINFO: ", input_code)
|
| 33 |
+
params = {
|
| 34 |
+
"username": username
|
| 35 |
+
}
|
| 36 |
+
try:
|
| 37 |
+
r = requests.get(url, params=params, timeout=5)
|
| 38 |
+
data = r.json()
|
| 39 |
+
if data.get("geonames"):
|
| 40 |
+
input_code = input_code.strip().upper()
|
| 41 |
+
for country in data["geonames"]:
|
| 42 |
+
# Match against country name, country code (alpha-2), iso alpha-3
|
| 43 |
+
if input_code in [
|
| 44 |
+
country.get("countryName", "").upper(),
|
| 45 |
+
country.get("countryCode", "").upper(),
|
| 46 |
+
country.get("isoAlpha3", "").upper()
|
| 47 |
+
]:
|
| 48 |
+
return country["countryName"]
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print("GeoNames countryInfoJSON error:", e)
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
# Combined smart lookup
|
| 54 |
+
def smart_country_lookup(user_input):
|
| 55 |
+
try:
|
| 56 |
+
raw_input = user_input.strip()
|
| 57 |
+
normalized = re.sub(r"[^a-zA-Z0-9]", "", user_input).upper() # normalize for codes (no strip spaces!)
|
| 58 |
+
print("raw input for smart country lookup: ",raw_input, ". Normalized country: ", normalized)
|
| 59 |
+
# Special case: if user writes "UK: London" → split and take main country part
|
| 60 |
+
if ":" in raw_input:
|
| 61 |
+
raw_input = raw_input.split(":")[0].strip() # only take "UK"
|
| 62 |
+
# First try as country code (if 2-3 letters or common abbreviation)
|
| 63 |
+
if len(normalized) <= 3:
|
| 64 |
+
if normalized.upper() in ["UK","U.K","U.K."]:
|
| 65 |
+
country = get_country_from_geonames(normalized.upper())
|
| 66 |
+
print("get_country_from_geonames(normalized.upper()) ", country)
|
| 67 |
+
if country:
|
| 68 |
+
return country
|
| 69 |
+
else:
|
| 70 |
+
country = get_country_from_countryinfo(raw_input)
|
| 71 |
+
print("get_country_from_countryinfo(raw_input) ", country)
|
| 72 |
+
if country:
|
| 73 |
+
return country
|
| 74 |
+
print(raw_input)
|
| 75 |
+
country = get_country_from_countryinfo(raw_input) # try full names
|
| 76 |
+
print("get_country_from_countryinfo(raw_input) ", country)
|
| 77 |
+
if country:
|
| 78 |
+
return country
|
| 79 |
+
# Otherwise, treat as city/place
|
| 80 |
+
country = get_country_from_geonames(raw_input)
|
| 81 |
+
print("get_country_from_geonames(raw_input) ", country)
|
| 82 |
+
if country:
|
| 83 |
+
return country
|
| 84 |
+
|
| 85 |
+
return "Not found"
|
| 86 |
+
except:
|
| 87 |
+
country = model.get_country_from_text(user_input)
|
| 88 |
+
if country !="unknown":
|
| 89 |
+
return country
|
| 90 |
+
else:
|
| 91 |
+
return "Not found"
|