AIEcosystem commited on
Commit
affdf72
·
verified ·
1 Parent(s): 2295697

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -90
src/streamlit_app.py CHANGED
@@ -12,8 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
  st.markdown(
18
  """
19
  <style>
@@ -57,15 +55,7 @@ st.markdown(
57
  }
58
  </style>
59
  """,
60
- unsafe_allow_html=True
61
- )
62
-
63
-
64
-
65
-
66
-
67
-
68
-
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
  st.subheader("Business Core", divider="orange")
@@ -79,7 +69,7 @@ Results are presented in easy-to-read tables, visualized in an interactive tree
79
 
80
  **Usage Limits:** You can request results unlimited times for one (1) month.
81
 
82
- **Supported Languages:** English
83
 
84
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
85
 
@@ -88,11 +78,7 @@ For any errors or inquiries, please contact us at info@nlpblogs.com""")
88
  with st.sidebar:
89
  st.write("Use the following code to embed the Business Core web app on your website. Feel free to adjust the width and height values to fit your page.")
90
  code = '''
91
- <iframe
92
- src="https://aiecosystem-business-core.hf.space"
93
- frameborder="0"
94
- width="850"
95
- height="450"
96
  ></iframe>
97
  '''
98
  st.code(code, language="html")
@@ -101,67 +87,23 @@ with st.sidebar:
101
  st.divider()
102
  st.subheader("🚀 Ready to build your own AI Web App?", divider="orange")
103
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
104
-
105
  # --- Comet ML Setup ---
106
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
107
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
108
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
109
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
110
-
111
  if not comet_initialized:
112
  st.warning("Comet ML not initialized. Check environment variables.")
113
-
114
  # --- Label Definitions ---
115
-
116
- labels = [
117
- "Person",
118
-
119
- "Contact",
120
- "Company",
121
- "Department",
122
- "Vendor",
123
- "Client",
124
- "Office",
125
- "Warehouse",
126
- "Address",
127
- "City",
128
- "State",
129
- "Country",
130
- "Date",
131
- "Time",
132
- "Time_period",
133
- "Revenue",
134
- "Cost",
135
- "Budget",
136
- "Invoice_number",
137
- "Product",
138
- "Service",
139
- "Task",
140
- "Project",
141
- "Status",
142
- "Asset",
143
- "Transaction"
144
-
145
- ]
146
-
147
-
148
  # Create a mapping dictionary for labels to categories
149
-
150
  category_mapping = {
151
-
152
-
153
- "People": ["Person", "Employee", "Contact"],
154
  "Organizations": ["Company", "Department", "Vendor", "Client"],
155
  "Locations": ["Office", "Warehouse", "Address", "City", "State", "Country"],
156
  "Time & Finance" : ["Date", "Time", "Time_period", "Revenue", "Cost", "Budget", "Invoice_number"],
157
-
158
  "Other Entities": ["Product", "Service", "Task", "Project", "Status", "Asset", "Transaction"],
159
-
160
- }
161
-
162
-
163
-
164
-
165
  # --- Model Loading ---
166
  @st.cache_resource
167
  def load_ner_model():
@@ -172,30 +114,28 @@ def load_ner_model():
172
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
173
  st.stop()
174
  model = load_ner_model()
175
-
176
  # Flatten the mapping to a single dictionary
177
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
178
-
179
  # --- Text Input and Clear Button ---
180
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
181
-
 
 
182
  def clear_text():
183
  """Clears the text area."""
184
  st.session_state['my_text_area'] = ""
185
-
186
  st.button("Clear text", on_click=clear_text)
187
-
188
-
189
  # --- Results Section ---
190
  if st.button("Results"):
191
  start_time = time.time()
192
  if not text.strip():
193
  st.warning("Please enter some text to extract entities.")
 
 
194
  else:
195
  with st.spinner("Extracting entities...", show_time=True):
196
  entities = model.predict_entities(text, labels)
197
  df = pd.DataFrame(entities)
198
-
199
  if not df.empty:
200
  df['category'] = df['label'].map(reverse_category_mapping)
201
  if comet_initialized:
@@ -206,13 +146,10 @@ if st.button("Results"):
206
  )
207
  experiment.log_parameter("input_text", text)
208
  experiment.log_table("predicted_entities", df)
209
-
210
  st.subheader("Grouped Entities by Category", divider = "orange")
211
-
212
  # Create tabs for each category
213
  category_names = sorted(list(category_mapping.keys()))
214
  category_tabs = st.tabs(category_names)
215
-
216
  for i, category_name in enumerate(category_names):
217
  with category_tabs[i]:
218
  df_category_filtered = df[df['category'] == category_name]
@@ -220,9 +157,6 @@ if st.button("Results"):
220
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
221
  else:
222
  st.info(f"No entities found for the '{category_name}' category.")
223
-
224
-
225
-
226
  with st.expander("See Glossary of tags"):
227
  st.write('''
228
  - **text**: ['entity extracted from your text data']
@@ -232,18 +166,15 @@ if st.button("Results"):
232
  - **end**: ['index of the end of the corresponding entity']
233
  ''')
234
  st.divider()
235
-
236
  # Tree map
237
  st.subheader("Tree map", divider = "orange")
238
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
239
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#FFF8F0', plot_bgcolor='#FFF8F0')
240
  st.plotly_chart(fig_treemap)
241
-
242
  # Pie and Bar charts
243
  grouped_counts = df['category'].value_counts().reset_index()
244
  grouped_counts.columns = ['category', 'count']
245
  col1, col2 = st.columns(2)
246
-
247
  with col1:
248
  st.subheader("Pie chart", divider = "orange")
249
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -253,7 +184,6 @@ if st.button("Results"):
253
  plot_bgcolor='#FFF8F0'
254
  )
255
  st.plotly_chart(fig_pie)
256
-
257
  with col2:
258
  st.subheader("Bar chart", divider = "orange")
259
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -262,7 +192,6 @@ if st.button("Results"):
262
  plot_bgcolor='#FFF8F0'
263
  )
264
  st.plotly_chart(fig_bar)
265
-
266
  # Most Frequent Entities
267
  st.subheader("Most Frequent Entities", divider="orange")
268
  word_counts = df['text'].value_counts().reset_index()
@@ -277,10 +206,8 @@ if st.button("Results"):
277
  st.plotly_chart(fig_repeating_bar)
278
  else:
279
  st.warning("No entities were found that occur more than once.")
280
-
281
  # Download Section
282
  st.divider()
283
-
284
  dfa = pd.DataFrame(
285
  data={
286
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -290,7 +217,6 @@ if st.button("Results"):
290
  'accuracy score; how accurately a tag has been assigned to a given entity',
291
  'index of the start of the corresponding entity',
292
  'index of the end of the corresponding entity',
293
-
294
  ]
295
  }
296
  )
@@ -298,7 +224,6 @@ if st.button("Results"):
298
  with zipfile.ZipFile(buf, "w") as myzip:
299
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
300
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
301
-
302
  with stylable_container(
303
  key="download_button",
304
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -309,14 +234,12 @@ if st.button("Results"):
309
  file_name="nlpblogs_results.zip",
310
  mime="application/zip",
311
  )
312
-
313
  if comet_initialized:
314
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
315
  experiment.end()
316
  else: # If df is empty
317
  st.warning("No entities were found in the provided text.")
318
-
319
- end_time = time.time()
320
  elapsed_time = end_time - start_time
321
  st.text("")
322
  st.text("")
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
15
  st.markdown(
16
  """
17
  <style>
 
55
  }
56
  </style>
57
  """,
58
+ unsafe_allow_html=True)
 
 
 
 
 
 
 
 
59
  # --- Page Configuration and UI Elements ---
60
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
61
  st.subheader("Business Core", divider="orange")
 
69
 
70
  **Usage Limits:** You can request results unlimited times for one (1) month.
71
 
72
+ **Supported Languages:** English
73
 
74
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
75
 
 
78
  with st.sidebar:
79
  st.write("Use the following code to embed the Business Core web app on your website. Feel free to adjust the width and height values to fit your page.")
80
  code = '''
81
+ <iframe src="https://aiecosystem-business-core.hf.space" frameborder="0" width="850" height="450"
 
 
 
 
82
  ></iframe>
83
  '''
84
  st.code(code, language="html")
 
87
  st.divider()
88
  st.subheader("🚀 Ready to build your own AI Web App?", divider="orange")
89
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
90
  # --- Comet ML Setup ---
91
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
92
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
93
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
94
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
95
  if not comet_initialized:
96
  st.warning("Comet ML not initialized. Check environment variables.")
 
97
  # --- Label Definitions ---
98
+ labels = ["Person","Contact","Company","Department","Vendor","Client","Office","Warehouse","Address","City","State","Country","Date","Time","Time_period","Revenue","Cost","Budget","Invoice_number","Product","Service","Task","Project","Status","Asset","Transaction"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # Create a mapping dictionary for labels to categories
 
100
  category_mapping = {
101
+ "People": ["Person", "Employee", "Contact"],
 
 
102
  "Organizations": ["Company", "Department", "Vendor", "Client"],
103
  "Locations": ["Office", "Warehouse", "Address", "City", "State", "Country"],
104
  "Time & Finance" : ["Date", "Time", "Time_period", "Revenue", "Cost", "Budget", "Invoice_number"],
 
105
  "Other Entities": ["Product", "Service", "Task", "Project", "Status", "Asset", "Transaction"],
106
+ }
 
 
 
 
 
107
  # --- Model Loading ---
108
  @st.cache_resource
109
  def load_ner_model():
 
114
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
115
  st.stop()
116
  model = load_ner_model()
 
117
  # Flatten the mapping to a single dictionary
118
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
119
  # --- Text Input and Clear Button ---
120
+ word_limit = 200
121
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
122
+ word_count = len(text.split())
123
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
124
  def clear_text():
125
  """Clears the text area."""
126
  st.session_state['my_text_area'] = ""
 
127
  st.button("Clear text", on_click=clear_text)
 
 
128
  # --- Results Section ---
129
  if st.button("Results"):
130
  start_time = time.time()
131
  if not text.strip():
132
  st.warning("Please enter some text to extract entities.")
133
+ elif word_count > word_limit:
134
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
135
  else:
136
  with st.spinner("Extracting entities...", show_time=True):
137
  entities = model.predict_entities(text, labels)
138
  df = pd.DataFrame(entities)
 
139
  if not df.empty:
140
  df['category'] = df['label'].map(reverse_category_mapping)
141
  if comet_initialized:
 
146
  )
147
  experiment.log_parameter("input_text", text)
148
  experiment.log_table("predicted_entities", df)
 
149
  st.subheader("Grouped Entities by Category", divider = "orange")
 
150
  # Create tabs for each category
151
  category_names = sorted(list(category_mapping.keys()))
152
  category_tabs = st.tabs(category_names)
 
153
  for i, category_name in enumerate(category_names):
154
  with category_tabs[i]:
155
  df_category_filtered = df[df['category'] == category_name]
 
157
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
158
  else:
159
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
160
  with st.expander("See Glossary of tags"):
161
  st.write('''
162
  - **text**: ['entity extracted from your text data']
 
166
  - **end**: ['index of the end of the corresponding entity']
167
  ''')
168
  st.divider()
 
169
  # Tree map
170
  st.subheader("Tree map", divider = "orange")
171
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
172
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#FFF8F0', plot_bgcolor='#FFF8F0')
173
  st.plotly_chart(fig_treemap)
 
174
  # Pie and Bar charts
175
  grouped_counts = df['category'].value_counts().reset_index()
176
  grouped_counts.columns = ['category', 'count']
177
  col1, col2 = st.columns(2)
 
178
  with col1:
179
  st.subheader("Pie chart", divider = "orange")
180
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
184
  plot_bgcolor='#FFF8F0'
185
  )
186
  st.plotly_chart(fig_pie)
 
187
  with col2:
188
  st.subheader("Bar chart", divider = "orange")
189
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
192
  plot_bgcolor='#FFF8F0'
193
  )
194
  st.plotly_chart(fig_bar)
 
195
  # Most Frequent Entities
196
  st.subheader("Most Frequent Entities", divider="orange")
197
  word_counts = df['text'].value_counts().reset_index()
 
206
  st.plotly_chart(fig_repeating_bar)
207
  else:
208
  st.warning("No entities were found that occur more than once.")
 
209
  # Download Section
210
  st.divider()
 
211
  dfa = pd.DataFrame(
212
  data={
213
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
217
  'accuracy score; how accurately a tag has been assigned to a given entity',
218
  'index of the start of the corresponding entity',
219
  'index of the end of the corresponding entity',
 
220
  ]
221
  }
222
  )
 
224
  with zipfile.ZipFile(buf, "w") as myzip:
225
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
226
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
227
  with stylable_container(
228
  key="download_button",
229
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
234
  file_name="nlpblogs_results.zip",
235
  mime="application/zip",
236
  )
 
237
  if comet_initialized:
238
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
239
  experiment.end()
240
  else: # If df is empty
241
  st.warning("No entities were found in the provided text.")
242
+ end_time = time.time()
 
243
  elapsed_time = end_time - start_time
244
  st.text("")
245
  st.text("")