Spaces:

WordLift
/

entity-linking

Running

App Files Files

cyberandy commited on Jul 16

Commit

ad1e1d2

verified ·

1 Parent(s): 25d948c

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -34

app.py CHANGED Viewed

@@ -75,46 +75,75 @@ def load_model(selected_language, model_name=None, entity_set=None):
     # Suppress warnings during model loading
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         try:
-            if selected_language == "German" or selected_language == "English - spaCy":
-                # ... (your existing spaCy loading logic)
             else:
                 try:
-                    # Attempt to load the pretrained model directly
-                    refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
-                    return refined_model
-                except AttributeError as e:
                     if "add_special_tokens" in str(e):
-                        st.warning("Encountered 'add_special_tokens' conflict. Attempting to fix by modifying tokenizer config...")
-                        # Define a local directory to save the model
-                        local_model_dir = f"./{model_name}_{entity_set}"
-                        # Download and save the tokenizer, then modify its config
                         tokenizer = AutoTokenizer.from_pretrained(model_name)
-                        tokenizer.save_pretrained(local_model_dir)
-                        # Load the tokenizer_config.json and remove the conflicting key
-                        tokenizer_config_path = f"{local_model_dir}/tokenizer_config.json"
-                        with open(tokenizer_config_path, 'r') as f:
-                            config = json.load(f)
-                        if "add_special_tokens" in config:
-                            del config["add_special_tokens"]
-                        with open(tokenizer_config_path, 'w') as f:
-                            json.dump(config, f, indent=2)
-                        # Download and save the model
-                        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-                        model.save_pretrained(local_model_dir)
-                        # Load the model from the modified local directory
-                        refined_model = Refined.from_pretrained(model_name=local_model_dir, entity_set=entity_set)
-                        st.success("Successfully loaded model after applying fix.")
-                        return refined_model
                     else:
-                        raise e  # Re-raise other AttributeError exceptions
         except Exception as e:
             st.error(f"Error loading model: {e}")
             return None

     # Suppress warnings during model loading
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         try:
+            # This block handles the spaCy models for German and English
+            if selected_language == "German":
+                try:
+                    nlp_model_de = spacy.load("de_core_news_lg")
+                except OSError:
+                    st.info("Downloading German language model... This may take a moment.")
+                    spacy.cli.download("de_core_news_lg")
+                    nlp_model_de = spacy.load("de_core_news_lg")
+                if "entityfishing" not in nlp_model_de.pipe_names:
+                    try:
+                        nlp_model_de.add_pipe("entityfishing")
+                    except Exception as e:
+                        st.warning(f"Entity-fishing not available, using basic NER only: {e}")
+                return nlp_model_de
+            elif selected_language == "English - spaCy":
+                try:
+                    nlp_model_en = spacy.load("en_core_web_sm")
+                except OSError:
+                    st.info("Downloading English language model... This may take a moment.")
+                    spacy.cli.download("en_core_web_sm")
+                    nlp_model_en = spacy.load("en_core_web_sm")
+                if "entityfishing" not in nlp_model_en.pipe_names:
+                    try:
+                        nlp_model_en.add_pipe("entityfishing")
+                    except Exception as e:
+                        st.warning(f"Entity-fishing not available, using basic NER only: {e}")
+                return nlp_model_en
+            # This block handles the ReFinED model and the "add_special_tokens" error
             else:
                 try:
+                    # First, attempt to load the model as usual
+                    return Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
+                except Exception as e:
+                    # If the specific "add_special_tokens" error occurs, apply the fix
                     if "add_special_tokens" in str(e):
+                        st.warning("Conflict detected. Applying fix by modifying tokenizer config...")
+                        # Define a local path to save/load the fixed model
+                        local_model_path = f"./{model_name}-{entity_set}-fixed"
+                        # Download tokenizer, modify config, and save locally
                         tokenizer = AutoTokenizer.from_pretrained(model_name)
+                        tokenizer.save_pretrained(local_model_path)
+                        config_path = os.path.join(local_model_path, "tokenizer_config.json")
+                        with open(config_path, "r") as f:
+                            config_data = json.load(f)
+                        # Remove the conflicting parameter
+                        config_data.pop("add_special_tokens", None)
+                        with open(config_path, "w") as f:
+                            json.dump(config_data, f, indent=2)
+                        # Now, load the model from the local, fixed path
+                        st.success("Fix applied. Loading model from local cache.")
+                        return Refined.from_pretrained(model_name=local_model_path, entity_set=entity_set)
                     else:
+                        # If it's a different error, raise it
+                        raise e
         except Exception as e:
             st.error(f"Error loading model: {e}")
             return None