| import gradio as gr | |
| import json | |
| from mistralai import Mistral | |
| from pydantic import BaseModel, Field | |
| from datetime import datetime | |
| import base64 | |
| from io import BytesIO | |
| from PIL import Image | |
| import os | |
| class ClimateData(BaseModel): | |
| document_type: str = Field(..., description="Type of document: report, policy, research, assessment, etc.") | |
| title: str = Field(..., description="Document title or main heading") | |
| organization: str = Field(..., description="Publishing organization or agency") | |
| publication_date: str = Field(..., description="Publication or release date") | |
| temperature_data: list[str] = Field(default=[], description="Temperature readings, anomalies, projections") | |
| precipitation_data: list[str] = Field(default=[], description="Precipitation measurements and forecasts") | |
| co2_levels: list[str] = Field(default=[], description="CO2 concentration data and emissions") | |
| sea_level_data: list[str] = Field(default=[], description="Sea level rise measurements") | |
| extreme_events: list[str] = Field(default=[], description="Extreme weather events and frequencies") | |
| year_ranges: list[str] = Field(default=[], description="Time periods and date ranges covered") | |
| baseline_periods: list[str] = Field(default=[], description="Reference or baseline periods used") | |
| projection_periods: list[str] = Field(default=[], description="Future projection timeframes") | |
| policy_recommendations: list[str] = Field(default=[], description="Policy recommendations and actions") | |
| targets_goals: list[str] = Field(default=[], description="Climate targets, goals, and commitments") | |
| mitigation_strategies: list[str] = Field(default=[], description="Mitigation approaches and strategies") | |
| adaptation_measures: list[str] = Field(default=[], description="Adaptation measures and plans") | |
| regions_covered: list[str] = Field(default=[], description="Geographical regions or countries covered") | |
| sectors_affected: list[str] = Field(default=[], description="Economic sectors or systems affected") | |
| main_conclusions: list[str] = Field(default=[], description="Primary conclusions and findings") | |
| risk_assessments: list[str] = Field(default=[], description="Risk levels and assessments") | |
| uncertainty_levels: list[str] = Field(default=[], description="Uncertainty ranges and confidence levels") | |
| class ChartDescription(BaseModel): | |
| chart_type: str = Field(..., description="Type of visualization: line chart, bar chart, map, table, etc.") | |
| data_type: str = Field(..., description="Type of data shown: temperature, emissions, policy timeline, etc.") | |
| trend_description: str = Field(..., description="Description of trends, patterns, and changes") | |
| key_insights: str = Field(..., description="Important findings and takeaways from the visualization") | |
| time_period: str = Field(..., description="Time period or range covered in the chart") | |
| geographical_scope: str = Field(..., description="Geographical area or regions shown") | |
| def initialize_client(api_key): | |
| if not api_key: | |
| raise ValueError("Please provide a valid Mistral API key") | |
| return Mistral(api_key=api_key) | |
| def extract_climate_data(api_key, file_path=None, url=None): | |
| try: | |
| client = initialize_client(api_key) | |
| from mistralai.extra import response_format_from_pydantic_model | |
| if file_path: | |
| uploaded_file = client.files.upload( | |
| file={"file_name": os.path.basename(file_path), "content": open(file_path, "rb")}, | |
| purpose="ocr" | |
| ) | |
| signed_url = client.files.get_signed_url(file_id=uploaded_file.id) | |
| document_url = signed_url.url | |
| elif url: | |
| document_url = url | |
| else: | |
| return {"error": "No file or URL provided"} | |
| response = client.ocr.process( | |
| model="mistral-ocr-latest", | |
| document={"type": "document_url", "document_url": document_url}, | |
| bbox_annotation_format=response_format_from_pydantic_model(ChartDescription), | |
| document_annotation_format=response_format_from_pydantic_model(ClimateData), | |
| include_image_base64=True | |
| ) | |
| extracted_text = response.text if hasattr(response, 'text') else "" | |
| bbox_annotations = response.bbox_annotations if hasattr(response, 'bbox_annotations') else [] | |
| doc_annotations = response.document_annotation if hasattr(response, 'document_annotation') else {} | |
| return { | |
| "success": True, | |
| "extracted_text": extracted_text, | |
| "climate_data": doc_annotations, | |
| "chart_descriptions": bbox_annotations, | |
| "raw_response": str(response) | |
| } | |
| except Exception as e: | |
| return {"error": f"OCR processing failed: {str(e)}"} | |
| def process_climate_document(api_key, file, url_input): | |
| """ | |
| The function `process_climate_document` extracts climate data from either a file or URL input and | |
| returns structured JSON data. | |
| :param api_key: The `api_key` parameter is typically a unique identifier or access token that allows | |
| you to authenticate and access a specific API or service. It is used in the | |
| `process_climate_document` function to authenticate and make requests to the `extract_climate_data` | |
| function. You need to provide a valid | |
| :param file: The `file` parameter in the `process_climate_document` function is used to pass a file | |
| object containing climate document data. If this parameter is provided, the function will extract | |
| climate data from the file using the `extract_climate_data` function | |
| :param url_input: The `url_input` parameter in the `process_climate_document` function is used to | |
| provide a URL input for extracting climate data. This URL should point to a document or webpage | |
| containing climate-related information that needs to be analyzed. The function will extract data | |
| from this URL if it is provided | |
| :return: The function `process_climate_document` returns a JSON object containing the analysis | |
| results of a climate document including climate_data, chart_descriptions, and extracted_text. | |
| """ | |
| if file: | |
| result = extract_climate_data(api_key, file_path=file.name) | |
| elif url_input.strip(): | |
| result = extract_climate_data(api_key, url=url_input.strip()) | |
| else: | |
| return {"error": "Please provide either a file or URL"} | |
| if "error" in result: | |
| return {"error": result['error']} | |
| return result | |
| def analyze_image(api_key, image): | |
| """ | |
| The function `analyze_image` takes an image, analyzes it using a chat model, and returns JSON output | |
| with information about the image content. | |
| :param api_key: The `api_key` parameter is a string that represents the API key required for | |
| authentication to access the chat API service. This key is used to initialize the client for making | |
| requests to the service | |
| :param image: The `analyze_image` function you provided seems to be a Python function that takes an | |
| API key and an image as input parameters. The function is designed to analyze the image using a chat | |
| completion model and provide a JSON output with specific fields related to the image content | |
| :return: The `analyze_image` function returns a JSON string containing information about the | |
| analyzed image. The JSON output includes fields such as image_type, climate_feature, location, | |
| date_captured, cloud_density, temperature_anomaly, and description. If the image analysis is | |
| successful, the function returns the analyzed results in JSON format. If there is an error during | |
| processing, it returns an error message with default values for | |
| """ | |
| try: | |
| client = initialize_client(api_key) | |
| buffered = BytesIO() | |
| image.save(buffered, format="PNG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode() | |
| prompt = """Analyze this image and provide a JSON output with the following fields: | |
| - image_type: Type of image (e.g., satellite, ground, aerial) | |
| - climate_feature: Primary climate feature observed (e.g., cloud_cover, precipitation) | |
| - location: Estimated or general location (e.g., Pacific Ocean, Sahara Desert) | |
| - date_captured: Current date in YYYY-MM-DD format | |
| - cloud_density: Estimated cloud density (0.0 to 1.0) if applicable | |
| - temperature_anomaly: Estimated temperature anomaly in Celsius (e.g., 1.2) | |
| - description: Brief description of the image content | |
| """ | |
| response = client.chat.complete( | |
| model="pixtral-large-latest", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| {"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"} | |
| ] | |
| } | |
| ] | |
| ) | |
| response_text = response.choices[0].message.content | |
| try: | |
| response_text = response_text.replace("```json", "").replace("```", "").strip() | |
| result = json.loads(response_text) | |
| except json.JSONDecodeError: | |
| result = { | |
| "image_type": "unknown", | |
| "climate_feature": "unknown", | |
| "location": "unknown", | |
| "date_captured": datetime.now().strftime("%Y-%m-DD"), | |
| "cloud_density": 0.0, | |
| "temperature_anomaly": 0.0, | |
| "description": "Error parsing model output." | |
| } | |
| return result | |
| except Exception as e: | |
| error_result = { | |
| "image_type": "error", | |
| "climate_feature": "none", | |
| "location": "none", | |
| "date_captured": datetime.now().strftime("%Y-%m-DD"), | |
| "cloud_density": 0.0, | |
| "temperature_anomaly": 0.0, | |
| "description": f"Error processing image: {str(e)}" | |
| } | |
| return error_result | |
| with gr.Blocks(title="Climate Data and Image Analyzer") as demo: | |
| gr.Markdown("# Climate Data and Image Analysis Tool\nAnalyze climate documents or images using Mistral OCR and Pixtral models") | |
| api_key_input = gr.Textbox( | |
| label="Mistral API Key", | |
| placeholder="Enter your Mistral API key here", | |
| type="password" | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab(label="Document Analysis"): | |
| gr.Markdown("## Document Analysis\nExtract data from climate reports, policies, or research papers") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="Upload Climate Document", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".pptx"] | |
| ) | |
| url_input = gr.Textbox( | |
| label="Or Enter Document URL", | |
| placeholder="https://example.com/climate-policy.pdf" | |
| ) | |
| process_btn = gr.Button("Analyze Document", variant="primary") | |
| with gr.Column(): | |
| doc_output = gr.JSON(label="Document Analysis Results") | |
| process_btn.click( | |
| fn=process_climate_document, | |
| inputs=[api_key_input, file_input, url_input], | |
| outputs=doc_output | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| [None, "https://static.pib.gov.in/WriteReadData/specificdocs/documents/2021/dec/doc202112101.pdf"], | |
| [None, "https://www.ipcc.ch/site/assets/uploads/2018/02/WG1AR5_Chapter02_FINAL.pdf"], | |
| [None, "https://unfccc.int/sites/default/files/resource/parisagreement_publication.pdf"] | |
| ], | |
| inputs=[file_input, url_input] | |
| ) | |
| with gr.Tab(label="Image Analysis"): | |
| gr.Markdown("## Image Analysis\nAnalyze climate-related images for features like cloud cover or temperature anomalies") | |
| image_input = gr.Image(type="pil", label="Upload Image") | |
| image_btn = gr.Button("Analyze Image", variant="primary") | |
| image_output = gr.JSON(label="Image Analysis Result") | |
| image_btn.click( | |
| fn=analyze_image, | |
| inputs=[api_key_input, image_input], | |
| outputs=image_output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |