Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +3 -0
pipeline.py
CHANGED
|
@@ -200,6 +200,7 @@ def pipeline_with_gemini(accessions):
|
|
| 200 |
# first way: ncbi method
|
| 201 |
if country.lower() != "unknown":
|
| 202 |
stand_country = standardize_location.smart_country_lookup(country.lower())
|
|
|
|
| 203 |
if stand_country.lower() != "not found":
|
| 204 |
acc_score["country"][stand_country.lower()] = ["ncbi"]
|
| 205 |
else: acc_score["country"][country.lower()] = ["ncbi"]
|
|
@@ -211,6 +212,7 @@ def pipeline_with_gemini(accessions):
|
|
| 211 |
acc_score["sample_type"][sample_type.lower()] = ["ncbi"]
|
| 212 |
# second way: LLM model
|
| 213 |
# Preprocess the input token
|
|
|
|
| 214 |
accession, isolate = None, None
|
| 215 |
if acc != "unknown": accession = acc
|
| 216 |
if iso != "unknown": isolate = iso
|
|
@@ -218,6 +220,7 @@ def pipeline_with_gemini(accessions):
|
|
| 218 |
if doi != "unknown":
|
| 219 |
link = 'https://doi.org/' + doi
|
| 220 |
# get the file to create listOfFile for each id
|
|
|
|
| 221 |
html = extractHTML.HTML("",link)
|
| 222 |
jsonSM = html.getSupMaterial()
|
| 223 |
article_text = html.getListSection()
|
|
|
|
| 200 |
# first way: ncbi method
|
| 201 |
if country.lower() != "unknown":
|
| 202 |
stand_country = standardize_location.smart_country_lookup(country.lower())
|
| 203 |
+
print("stand_country: ", stand_country)
|
| 204 |
if stand_country.lower() != "not found":
|
| 205 |
acc_score["country"][stand_country.lower()] = ["ncbi"]
|
| 206 |
else: acc_score["country"][country.lower()] = ["ncbi"]
|
|
|
|
| 212 |
acc_score["sample_type"][sample_type.lower()] = ["ncbi"]
|
| 213 |
# second way: LLM model
|
| 214 |
# Preprocess the input token
|
| 215 |
+
print(acc_score)
|
| 216 |
accession, isolate = None, None
|
| 217 |
if acc != "unknown": accession = acc
|
| 218 |
if iso != "unknown": isolate = iso
|
|
|
|
| 220 |
if doi != "unknown":
|
| 221 |
link = 'https://doi.org/' + doi
|
| 222 |
# get the file to create listOfFile for each id
|
| 223 |
+
print("link of doi: ", link)
|
| 224 |
html = extractHTML.HTML("",link)
|
| 225 |
jsonSM = html.getSupMaterial()
|
| 226 |
article_text = html.getListSection()
|