n0v33n commited on
Commit
9bfda2f
·
1 Parent(s): 27e696a

updated files

Browse files
Files changed (1) hide show
  1. app.py +222 -0
app.py CHANGED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ from mistralai import Mistral
4
+ from pydantic import BaseModel, Field
5
+ from datetime import datetime
6
+ import base64
7
+ from io import BytesIO
8
+ from PIL import Image
9
+
10
+ class ClimateData(BaseModel):
11
+ document_type: str = Field(..., description="Type of document: report, policy, research, assessment, etc.")
12
+ title: str = Field(..., description="Document title or main heading")
13
+ organization: str = Field(..., description="Publishing organization or agency")
14
+ publication_date: str = Field(..., description="Publication or release date")
15
+ temperature_data: list[str] = Field(default=[], description="Temperature readings, anomalies, projections")
16
+ precipitation_data: list[str] = Field(default=[], description="Precipitation measurements and forecasts")
17
+ co2_levels: list[str] = Field(default=[], description="CO2 concentration data and emissions")
18
+ sea_level_data: list[str] = Field(default=[], description="Sea level rise measurements")
19
+ extreme_events: list[str] = Field(default=[], description="Extreme weather events and frequencies")
20
+ year_ranges: list[str] = Field(default=[], description="Time periods and date ranges covered")
21
+ baseline_periods: list[str] = Field(default=[], description="Reference or baseline periods used")
22
+ projection_periods: list[str] = Field(default=[], description="Future projection timeframes")
23
+ policy_recommendations: list[str] = Field(default=[], description="Policy recommendations and actions")
24
+ targets_goals: list[str] = Field(default=[], description="Climate targets, goals, and commitments")
25
+ mitigation_strategies: list[str] = Field(default=[], description="Mitigation approaches and strategies")
26
+ adaptation_measures: list[str] = Field(default=[], description="Adaptation measures and plans")
27
+ regions_covered: list[str] = Field(default=[], description="Geographical regions or countries covered")
28
+ sectors_affected: list[str] = Field(default=[], description="Economic sectors or systems affected")
29
+ main_conclusions: list[str] = Field(default=[], description="Primary conclusions and findings")
30
+ risk_assessments: list[str] = Field(default=[], description="Risk levels and assessments")
31
+ uncertainty_levels: list[str] = Field(default=[], description="Uncertainty ranges and confidence levels")
32
+
33
+ class ChartDescription(BaseModel):
34
+ chart_type: str = Field(..., description="Type of visualization: line chart, bar chart, map, table, etc.")
35
+ data_type: str = Field(..., description="Type of data shown: temperature, emissions, policy timeline, etc.")
36
+ trend_description: str = Field(..., description="Description of trends, patterns, and changes")
37
+ key_insights: str = Field(..., description="Important findings and takeaways from the visualization")
38
+ time_period: str = Field(..., description="Time period or range covered in the chart")
39
+ geographical_scope: str = Field(..., description="Geographical area or regions shown")
40
+
41
+ def initialize_client(api_key):
42
+ if not api_key:
43
+ raise ValueError("Please provide a valid Mistral API key")
44
+ return Mistral(api_key=api_key)
45
+
46
+ def extract_climate_data(api_key, file_path=None, url=None):
47
+ try:
48
+ client = initialize_client(api_key)
49
+ from mistralai.extra import response_format_from_pydantic_model
50
+ if file_path:
51
+ uploaded_file = client.files.upload(
52
+ file={"file_name": os.path.basename(file_path), "content": open(file_path, "rb")},
53
+ purpose="ocr"
54
+ )
55
+ signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
56
+ document_url = signed_url.url
57
+ elif url:
58
+ document_url = url
59
+ else:
60
+ return {"error": "No file or URL provided"}
61
+ response = client.ocr.process(
62
+ model="mistral-ocr-latest",
63
+ document={"type": "document_url", "document_url": document_url},
64
+ bbox_annotation_format=response_format_from_pydantic_model(ChartDescription),
65
+ document_annotation_format=response_format_from_pydantic_model(ClimateData),
66
+ include_image_base64=True
67
+ )
68
+ extracted_text = response.text if hasattr(response, 'text') else ""
69
+ bbox_annotations = response.bbox_annotations if hasattr(response, 'bbox_annotations') else []
70
+ doc_annotations = response.document_annotation if hasattr(response, 'document_annotation') else {}
71
+ return {
72
+ "success": True,
73
+ "extracted_text": extracted_text,
74
+ "climate_data": doc_annotations,
75
+ "chart_descriptions": bbox_annotations,
76
+ "raw_response": str(response)
77
+ }
78
+ except Exception as e:
79
+ return {"error": f"OCR processing failed: {str(e)}"}
80
+
81
+ def process_climate_document(api_key, file, url_input):
82
+ if file:
83
+ result = extract_climate_data(api_key, file_path=file.name)
84
+ elif url_input.strip():
85
+ result = extract_climate_data(api_key, url=url_input.strip())
86
+ else:
87
+ return "Please provide either a file or URL"
88
+ if "error" in result:
89
+ return f"Error: {result['error']}"
90
+ output = "# Climate Document Analysis Results\n\n"
91
+ if result.get("climate_data"):
92
+ data = result['climate_data']
93
+ if isinstance(data, dict):
94
+ output += f"## Document Overview:\n"
95
+ output += f"**Type:** {data.get('document_type', 'N/A')}\n"
96
+ output += f"**Title:** {data.get('title', 'N/A')}\n"
97
+ output += f"**Organization:** {data.get('organization', 'N/A')}\n"
98
+ output += f"**Date:** {data.get('publication_date', 'N/A')}\n\n"
99
+ output += "## Complete Structured Data:\n"
100
+ output += f"```json\n{json.dumps(data, indent=2)}\n```\n\n"
101
+ else:
102
+ output += "## Extracted Climate Data:\n"
103
+ output += f"```\n{str(data)}\n```\n\n"
104
+ if result.get("chart_descriptions"):
105
+ output += "## Chart Analysis:\n"
106
+ charts = result['chart_descriptions']
107
+ if isinstance(charts, list):
108
+ for i, chart in enumerate(charts, 1):
109
+ if isinstance(chart, dict):
110
+ output += f"### Chart {i}:\n{json.dumps(chart, indent=2)}\n\n"
111
+ else:
112
+ output += f"### Chart {i}:\n{str(chart)}\n\n"
113
+ else:
114
+ output += f"```\n{str(charts)}\n```\n\n"
115
+ if result.get("extracted_text"):
116
+ output += "## Extracted Text:\n"
117
+ output += f"{result['extracted_text']}...\n\n"
118
+ return output
119
+
120
+ def analyze_image(api_key, image):
121
+ try:
122
+ client = initialize_client(api_key)
123
+ buffered = BytesIO()
124
+ image.save(buffered, format="PNG")
125
+ img_str = base64.b64encode(buffered.getvalue()).decode()
126
+ prompt = """Analyze this image and provide a JSON output with the following fields:
127
+ - image_type: Type of image (e.g., satellite, ground, aerial)
128
+ - climate_feature: Primary climate feature observed (e.g., cloud_cover, precipitation)
129
+ - location: Estimated or general location (e.g., Pacific Ocean, Sahara Desert)
130
+ - date_captured: Current date in YYYY-MM-DD format
131
+ - cloud_density: Estimated cloud density (0.0 to 1.0) if applicable
132
+ - temperature_anomaly: Estimated temperature anomaly in Celsius (e.g., 1.2)
133
+ - description: Brief description of the image content
134
+ """
135
+ response = client.chat.complete(
136
+ model="pixtral-large-latest",
137
+ messages=[
138
+ {
139
+ "role": "user",
140
+ "content": [
141
+ {"type": "text", "text": prompt},
142
+ {"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"}
143
+ ]
144
+ }
145
+ ]
146
+ )
147
+ response_text = response.choices[0].message.content
148
+ try:
149
+ response_text = response_text.replace("```json", "").replace("```", "").strip()
150
+ result = json.loads(response_text)
151
+ except json.JSONDecodeError:
152
+ result = {
153
+ "image_type": "unknown",
154
+ "climate_feature": "unknown",
155
+ "location": "unknown",
156
+ "date_captured": datetime.now().strftime("%Y-%m-DD"),
157
+ "cloud_density": 0.0,
158
+ "temperature_anomaly": 0.0,
159
+ "description": "Error parsing model output."
160
+ }
161
+ return json.dumps(result, indent=2)
162
+ except Exception as e:
163
+ error_result = {
164
+ "image_type": "error",
165
+ "climate_feature": "none",
166
+ "location": "none",
167
+ "date_captured": datetime.now().strftime("%Y-%m-DD"),
168
+ "cloud_density": 0.0,
169
+ "temperature_anomaly": 0.0,
170
+ "description": f"Error processing image: {str(e)}"
171
+ }
172
+ return json.dumps(error_result, indent=2)
173
+
174
+ with gr.Blocks(title="Climate Data and Image Analyzer") as demo:
175
+ gr.Markdown("# 🌍 Climate Data and Image Analysis Tool\nAnalyze climate documents or images using Mistral OCR and Pixtral models")
176
+ api_key_input = gr.Textbox(
177
+ label="Mistral API Key",
178
+ placeholder="Enter your Mistral API key here",
179
+ type="password"
180
+ )
181
+ with gr.Tabs():
182
+ with gr.Tab(label="Document Analysis"):
183
+ gr.Markdown("## Document Analysis\nExtract data from climate reports, policies, or research papers")
184
+ with gr.Row():
185
+ with gr.Column():
186
+ file_input = gr.File(
187
+ label="Upload Climate Document",
188
+ file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".pptx"]
189
+ )
190
+ url_input = gr.Textbox(
191
+ label="Or Enter Document URL",
192
+ placeholder="https://example.com/climate-policy.pdf"
193
+ )
194
+ process_btn = gr.Button("Analyze Document", variant="primary")
195
+ with gr.Column():
196
+ doc_output = gr.Markdown(label="Document Analysis Results")
197
+ process_btn.click(
198
+ fn=process_climate_document,
199
+ inputs=[api_key_input, file_input, url_input],
200
+ outputs=doc_output
201
+ )
202
+ gr.Examples(
203
+ examples=[
204
+ [None, "https://static.pib.gov.in/WriteReadData/specificdocs/documents/2021/dec/doc202112101.pdf"],
205
+ [None, "https://www.ipcc.ch/site/assets/uploads/2018/02/WG1AR5_Chapter02_FINAL.pdf"],
206
+ [None, "https://unfccc.int/sites/default/files/resource/parisagreement_publication.pdf"]
207
+ ],
208
+ inputs=[file_input, url_input]
209
+ )
210
+ with gr.Tab(label="Image Analysis"):
211
+ gr.Markdown("## Image Analysis\nAnalyze climate-related images for features like cloud cover or temperature anomalies")
212
+ image_input = gr.Image(type="pil", label="Upload Image")
213
+ image_btn = gr.Button("Analyze Image", variant="primary")
214
+ image_output = gr.JSON(label="Image Analysis Result")
215
+ image_btn.click(
216
+ fn=analyze_image,
217
+ inputs=[api_key_input, image_input],
218
+ outputs=image_output
219
+ )
220
+
221
+ if __name__ == "__main__":
222
+ demo.launch(mcp_server=True)