Spaces:

mknolan
/

cursor_slides_internvl2

Paused

App Files Files Community

mknolan commited on Mar 15

Commit

45a88d3

verified ·

1 Parent(s): 02532a9

Upload InternVL2 implementation

Browse files

Files changed (3) hide show

Dockerfile +2 -0
app_internvl2.py +31 -6
requirements.txt +2 -1

Dockerfile CHANGED Viewed

@@ -60,6 +60,8 @@ RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
     # Install timm for vision models
     pip3 install --no-cache-dir timm==0.9.11 && \
     # Install lmdeploy and its dependencies first
     pip3 install --no-cache-dir "accelerate==0.30.0" && \
     pip3 install --no-cache-dir "lmdeploy==0.5.3" && \

     pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
     # Install timm for vision models
     pip3 install --no-cache-dir timm==0.9.11 && \
+    # Install nest-asyncio for handling nested event loops
+    pip3 install --no-cache-dir nest-asyncio==1.5.8 && \
     # Install lmdeploy and its dependencies first
     pip3 install --no-cache-dir "accelerate==0.30.0" && \
     pip3 install --no-cache-dir "lmdeploy==0.5.3" && \

app_internvl2.py CHANGED Viewed

@@ -8,6 +8,11 @@ import warnings
 import stat
 import subprocess
 import sys
 # Set environment variables
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
@@ -143,11 +148,14 @@ def load_internvl2_model():
         # Configure for AWQ quantized model
         backend_config = TurbomindEngineConfig(model_format='awq')
-        # Create pipeline
         internvl2_pipeline = pipeline(
             MODEL_ID,
-            backend_config=backend_config,
-            log_level='INFO'
         )
         print("InternVL2 model loaded successfully!")
@@ -189,11 +197,28 @@ def analyze_image(image, prompt):
             # If somehow it's already a PIL Image
             image_pil = image.convert('RGB')
-        # Run inference with the model
-        response = internvl2_pipeline((prompt, image_pil))
         # Get the response text
-        result = response.text
         elapsed_time = time.time() - start_time
         return result

 import stat
 import subprocess
 import sys
+import asyncio
+import nest_asyncio
+# Apply nest_asyncio to allow nested event loops
+nest_asyncio.apply()
 # Set environment variables
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
         # Configure for AWQ quantized model
         backend_config = TurbomindEngineConfig(model_format='awq')
+        # Create pipeline with non-streaming mode to avoid asyncio conflicts
         internvl2_pipeline = pipeline(
             MODEL_ID,
+            backend_config=backend_config,
+            log_level='INFO',
+            model_name_or_path=None,
+            backend_name="turbomind",
+            stream=False  # Important: disable streaming to avoid asyncio issues
         )
         print("InternVL2 model loaded successfully!")
             # If somehow it's already a PIL Image
             image_pil = image.convert('RGB')
+        # Run inference with the model, handling event loop manually
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            # If we're in a running event loop (like Gradio's),
+            # we need to use run_in_executor for blocking operations
+            print("Using threaded execution for model inference")
+            # Define a function that will run in a separate thread
+            def run_inference():
+                return internvl2_pipeline((prompt, image_pil))
+            # Run the inference in a thread pool executor
+            response = loop.run_in_executor(None, run_inference)
+            # Wait for the result
+            if hasattr(response, "result"):
+                response = response.result()
+        else:
+            # Standard synchronous execution
+            print("Using standard execution for model inference")
+            response = internvl2_pipeline((prompt, image_pil))
         # Get the response text
+        result = response.text if hasattr(response, "text") else str(response)
         elapsed_time = time.time() - start_time
         return result

requirements.txt CHANGED Viewed

@@ -16,4 +16,5 @@ packaging==23.2
 pyyaml==6.0.1
 tqdm==4.66.1
 typing-extensions==4.10.0
-timm==0.9.11

 pyyaml==6.0.1
 tqdm==4.66.1
 typing-extensions==4.10.0
+timm==0.9.11
+nest-asyncio==1.5.8