Spaces:

TiniThingsInc
/

ffembeds

Sleeping

App Files Files Community

TiniThingsInc commited on Nov 13

Commit

d49b11e

verified ·

1 Parent(s): 2114026

to 4b

Browse files

Files changed (1) hide show

app.py +78 -34

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-FairFate Embeddings API - Qwen3-Embedding-0.6B
 Multilingual semantic embeddings for tabletop RPG product classification
 """
@@ -10,12 +10,15 @@ from typing import List, Union
 import spaces  # ZeroGPU decorator
 # Load model once at startup
-MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"
 print(f"🔄 Loading model: {MODEL_NAME}")
 model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
 print(f"✅ Model loaded successfully")
-print(f"   Dimensions: {model.get_sentence_embedding_dimension()}")
 print(f"   Max Seq Length: {model.max_seq_length}")
 # Optional: Add instruction prefix for RPG domain (improves accuracy by 1-5%)
 INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
@@ -24,22 +27,24 @@ INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
 def generate_embeddings(
     texts: Union[str, List[str]],
     use_instruction: bool = True,
-    output_dimensions: int = 1024
 ) -> List[List[float]]:
     """
-    Generate embeddings for text(s)
     Args:
         texts: Single string or list of strings
         use_instruction: Whether to prepend instruction prefix (recommended)
-        output_dimensions: Output embedding size (32-1024)
     Returns:
-        List of embedding vectors
     """
     # Handle single string
     if isinstance(texts, str):
         texts = [texts]
-    # Add instruction prefix if enabled (Qwen3 is instruction-aware)
     if use_instruction:
         texts = [INSTRUCTION_PREFIX + text for text in texts]
@@ -52,10 +57,15 @@ def generate_embeddings(
         batch_size=32
     )
-    # Resize embeddings if needed (MRL - Multilingual Representation Learning)
-    if output_dimensions != 1024:
-        # Qwen3 supports flexible dimensions (32-1024)
-        # Simply truncate for smaller dimensions
         embeddings = embeddings[:, :output_dimensions]
     # Convert to list for JSON serialization
@@ -215,12 +225,16 @@ def calculate_similarity(text1: str, text2: str, use_instruction: bool) -> str:
 with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🎲 FairFate Embeddings API
-    **Powered by Qwen3-Embedding-0.6B** - #1 Multilingual Embedding Model
     - 🌍 **100+ Languages** (English, Spanish, French, German, Chinese, Japanese, etc.)
-    - 📐 **1024 Dimensions** (flexible 32-1024)
     - 📚 **32K Context** (massive text support)
     - ⚡ **Instruction-Aware** (optimized for RPG content)
-    - 🏆 **#1 on MTEB** Multilingual Leaderboard
     Perfect for: Product classification, semantic search, recommendations, multilingual matching
     """)
@@ -239,8 +253,8 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
                 )
                 use_inst = gr.Checkbox(label="Use instruction prefix (recommended for RPG content)", value=True)
                 output_dims = gr.Slider(
-                    minimum=32, maximum=1024, value=1024, step=32,
-                    label="Output Dimensions"
                 )
                 submit_btn = gr.Button("Generate Embeddings", variant="primary")
@@ -251,8 +265,8 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
         gr.Examples(
             examples=[
-                ["D&D 5E epic fantasy adventure with dragons and dungeons", True, 1024],
-                ["Cyberpunk shadowrun detective noir campaign\nPathfinder 2E beginner box starter set\nCall of Cthulhu horror investigation", True, 1024],
             ],
             inputs=[input_text, use_inst, output_dims],
         )
@@ -260,12 +274,14 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
     with gr.Tab("🔍 Similarity Calculator"):
         gr.Markdown("""
         **Comprehensive Similarity Analysis** - Compare two texts using multiple metrics:
         - **Cosine Similarity**: Angle between vectors (best for semantic meaning)
         - **Jaccard Similarity**: Intersection over union (set-like comparison)
         - **Sørensen-Dice**: Weighted intersection (emphasizes shared features)
         - **Euclidean Distance/Similarity**: Straight-line distance in vector space
         - **Manhattan Distance**: Grid-based distance (L1 norm)
         - **Pearson Correlation**: Linear relationship between vectors
         Perfect for duplicate detection, classification testing, and understanding product relationships!
         """)
@@ -301,31 +317,41 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
     with gr.Tab("📖 API Documentation"):
         gr.Markdown("""
         ## 🚀 Quick Start
         ### Python
         ```python
         import requests
         import numpy as np
         url = "https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict"
         # Generate embeddings
         texts = [
             "Storm King's Thunder - Epic D&D 5E adventure",
             "Curse of Strahd - Gothic horror campaign"
         ]
         response = requests.post(
             url,
             json={
-                "data": [texts, True, 1024],  # [texts, use_instruction, dimensions]
                 "fn_index": 0  # Index of generate_embeddings function
             }
         )
         result = response.json()
         embeddings = result["data"][0]
         print(f"Generated {len(embeddings)} embeddings")
-        print(f"Dimensions: {len(embeddings[0])}")
         ```
         ### TypeScript/JavaScript
         ```typescript
         const url = 'https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict';
         const response = await fetch(url, {
           method: 'POST',
           headers: { 'Content-Type': 'application/json' },
@@ -333,50 +359,63 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
             data: [
               ["Your text here", "Another text"],
               true,   // use_instruction
-              1024    // output_dimensions
             ],
             fn_index: 0
           })
         });
         const result = await response.json();
-        const embeddings = result.data[0];
         ```
         ### cURL
         ```bash
         curl -X POST \\
           https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict \\
           -H "Content-Type: application/json" \\
           -d '{
-            "data": [["Your text here"], true, 1024],
             "fn_index": 0
           }'
         ```
         ## 📊 Parameters
         | Parameter | Type | Default | Description |
         |-----------|------|---------|-------------|
         | `texts` | string[] | required | Array of texts to embed |
         | `use_instruction` | boolean | true | Add instruction prefix (improves accuracy) |
-        | `output_dimensions` | number | 1024 | Output size (32-1024) |
         ## 🎯 Use Cases
         - **Product Classification**: Auto-tag by genre, system, theme
         - **Semantic Search**: Find by meaning, not keywords
         - **Recommendations**: "Similar products"
         - **Duplicate Detection**: Find similar listings
         - **Multilingual Matching**: Cross-language similarity
         ## ⚡ Performance
         | Batch Size | GPU Throughput | CPU Throughput |
         |------------|----------------|----------------|
         | 1          | ~800/sec       | ~80/sec        |
         | 32         | ~4000/sec      | ~250/sec       |
         ## 🌍 Supported Languages
         English, Spanish, French, German, Italian, Portuguese, Russian, Polish, Dutch, Czech,
         Chinese, Japanese, Korean, Arabic, Hebrew, Hindi, Thai, Vietnamese, Indonesian,
         Turkish, Swedish, Norwegian, Danish, Finnish, Greek, Romanian, Hungarian, and 80+ more!
         ## 📝 Citation
         ```bibtex
-        @misc{qwen3embedding2025,
-          title={Qwen3 Embedding},
-          author={Alibaba Cloud},
           year={2025},
           url={https://github.com/QwenLM/Qwen3-Embedding}
         }
@@ -386,26 +425,31 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
     with gr.Tab("ℹ️ Model Info"):
         gr.Markdown(f"""
         ## Model Details
         - **Model:** {MODEL_NAME}
         - **Dimensions:** {model.get_sentence_embedding_dimension()}
         - **Max Sequence Length:** {model.max_seq_length} tokens
         - **Languages:** 100+
         - **License:** Apache 2.0
         - **Normalization:** L2 normalized (ready for cosine similarity)
         ## Advantages
-        ✅ **Best Multilingual Performance** - #1 on MTEB leaderboard
         ✅ **Massive Context** - 32K tokens (vs 512 for most models)
         ✅ **Instruction-Aware** - Can customize for specific domains
-        ✅ **Flexible Dimensions** - 32 to 1024 dimensions
         ✅ **Code-Switching** - Handles mixed-language text
         ## Resources
-        - [Model Card](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B)
-        - [GitHub](https://github.com/QwenLM/Qwen3-Embedding)
-        - [Blog Post](https://qwenlm.github.io/blog/qwen3-embedding/)
         - [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
         """)
 # Launch with API enabled
 if __name__ == "__main__":
     demo.launch()

 """
+FairFate Embeddings API - Qwen3-Embedding-4B
 Multilingual semantic embeddings for tabletop RPG product classification
 """
 import spaces  # ZeroGPU decorator
 # Load model once at startup
+# Using Qwen3-Embedding-4B for 2560 native dimensions (truncate to 1536 for production)
+# Qwen3-4B is optimal for 1536 dims: 60% retention (vs 42.9% for GTE-Qwen2-7B)
+MODEL_NAME = "Qwen/Qwen3-Embedding-4B"
 print(f"🔄 Loading model: {MODEL_NAME}")
 model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
 print(f"✅ Model loaded successfully")
+print(f"   Native Dimensions: {model.get_sentence_embedding_dimension()}")
 print(f"   Max Seq Length: {model.max_seq_length}")
+print(f"   Matryoshka Support: Yes (truncate to any dimension ≤ {model.get_sentence_embedding_dimension()})")
 # Optional: Add instruction prefix for RPG domain (improves accuracy by 1-5%)
 INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
 def generate_embeddings(
     texts: Union[str, List[str]],
     use_instruction: bool = True,
+    output_dimensions: int = 1536
 ) -> List[List[float]]:
     """
+    Generate embeddings for text(s) with matryoshka truncation
     Args:
         texts: Single string or list of strings
         use_instruction: Whether to prepend instruction prefix (recommended)
+        output_dimensions: Output embedding size (32-3584, default 1536 for production)
     Returns:
+        List of embedding vectors (L2 normalized)
     """
     # Handle single string
     if isinstance(texts, str):
         texts = [texts]
+    # Add instruction prefix if enabled (Qwen3-Embedding models are instruction-aware)
     if use_instruction:
         texts = [INSTRUCTION_PREFIX + text for text in texts]
         batch_size=32
     )
+    # Get native dimensions
+    native_dims = model.get_sentence_embedding_dimension()
+    # Matryoshka truncation: Simply take first N dimensions
+    # Qwen3-Embedding models support truncation to any dimension ≤ native_dims
+    if output_dimensions != native_dims:
+        if output_dimensions > native_dims:
+            print(f"⚠️ Warning: Requested {output_dimensions} dims but model has {native_dims}. Using {native_dims}.")
+            output_dimensions = native_dims
         embeddings = embeddings[:, :output_dimensions]
     # Convert to list for JSON serialization
 with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🎲 FairFate Embeddings API
+    **Powered by Qwen3-Embedding-4B** - Advanced Multilingual Embedding Model
     - 🌍 **100+ Languages** (English, Spanish, French, German, Chinese, Japanese, etc.)
+    - 📐 **2560 Native Dimensions** (matryoshka truncation to 1536 for production)
     - 📚 **32K Context** (massive text support)
     - ⚡ **Instruction-Aware** (optimized for RPG content)
+    - 🔬 **Matryoshka Support** (flexible 32-2560 dimensions)
+    - 🏆 **Optimal for 1536 dims** (60% dimension retention)
     Perfect for: Product classification, semantic search, recommendations, multilingual matching
     """)
                 )
                 use_inst = gr.Checkbox(label="Use instruction prefix (recommended for RPG content)", value=True)
                 output_dims = gr.Slider(
+                    minimum=32, maximum=2560, value=1536, step=32,
+                    label="Output Dimensions (Production: 1536)"
                 )
                 submit_btn = gr.Button("Generate Embeddings", variant="primary")
         gr.Examples(
             examples=[
+                ["D&D 5E epic fantasy adventure with dragons and dungeons", True, 1536],
+                ["Cyberpunk shadowrun detective noir campaign\nPathfinder 2E beginner box starter set\nCall of Cthulhu horror investigation", True, 1536],
             ],
             inputs=[input_text, use_inst, output_dims],
         )
     with gr.Tab("🔍 Similarity Calculator"):
         gr.Markdown("""
         **Comprehensive Similarity Analysis** - Compare two texts using multiple metrics:
         - **Cosine Similarity**: Angle between vectors (best for semantic meaning)
         - **Jaccard Similarity**: Intersection over union (set-like comparison)
         - **Sørensen-Dice**: Weighted intersection (emphasizes shared features)
         - **Euclidean Distance/Similarity**: Straight-line distance in vector space
         - **Manhattan Distance**: Grid-based distance (L1 norm)
         - **Pearson Correlation**: Linear relationship between vectors
         Perfect for duplicate detection, classification testing, and understanding product relationships!
         """)
     with gr.Tab("📖 API Documentation"):
         gr.Markdown("""
         ## 🚀 Quick Start
         ### Python
         ```python
         import requests
         import numpy as np
         url = "https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict"
         # Generate embeddings
         texts = [
             "Storm King's Thunder - Epic D&D 5E adventure",
             "Curse of Strahd - Gothic horror campaign"
         ]
         response = requests.post(
             url,
             json={
+                "data": [texts, True, 1536],  # [texts, use_instruction, dimensions]
                 "fn_index": 0  # Index of generate_embeddings function
             }
         )
         result = response.json()
         embeddings = result["data"][0]
         print(f"Generated {len(embeddings)} embeddings")
+        print(f"Dimensions: {len(embeddings[0])}")  # Should output 1536
         ```
         ### TypeScript/JavaScript
         ```typescript
         const url = 'https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict';
         const response = await fetch(url, {
           method: 'POST',
           headers: { 'Content-Type': 'application/json' },
             data: [
               ["Your text here", "Another text"],
               true,   // use_instruction
+              1536    // output_dimensions (production default)
             ],
             fn_index: 0
           })
         });
         const result = await response.json();
+        const embeddings = result.data[0]; // Array of 1536-dim vectors
         ```
         ### cURL
         ```bash
         curl -X POST \\
           https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict \\
           -H "Content-Type: application/json" \\
           -d '{
+            "data": [["Your text here"], true, 1536],
             "fn_index": 0
           }'
         ```
         ## 📊 Parameters
         | Parameter | Type | Default | Description |
         |-----------|------|---------|-------------|
         | `texts` | string[] | required | Array of texts to embed |
         | `use_instruction` | boolean | true | Add instruction prefix (improves accuracy) |
+        | `output_dimensions` | number | 1536 | Output size (32-3584, production default: 1536) |
         ## 🎯 Use Cases
         - **Product Classification**: Auto-tag by genre, system, theme
         - **Semantic Search**: Find by meaning, not keywords
         - **Recommendations**: "Similar products"
         - **Duplicate Detection**: Find similar listings
         - **Multilingual Matching**: Cross-language similarity
         ## ⚡ Performance
         | Batch Size | GPU Throughput | CPU Throughput |
         |------------|----------------|----------------|
         | 1          | ~800/sec       | ~80/sec        |
         | 32         | ~4000/sec      | ~250/sec       |
         ## 🌍 Supported Languages
         English, Spanish, French, German, Italian, Portuguese, Russian, Polish, Dutch, Czech,
         Chinese, Japanese, Korean, Arabic, Hebrew, Hindi, Thai, Vietnamese, Indonesian,
         Turkish, Swedish, Norwegian, Danish, Finnish, Greek, Romanian, Hungarian, and 80+ more!
         ## 📝 Citation
         ```bibtex
+        @misc{qwen3-embedding-2025,
+          title={Qwen3-Embedding: Multilingual Text Embedding Models},
+          author={Qwen Team, Alibaba Cloud},
           year={2025},
           url={https://github.com/QwenLM/Qwen3-Embedding}
         }
     with gr.Tab("ℹ️ Model Info"):
         gr.Markdown(f"""
         ## Model Details
         - **Model:** {MODEL_NAME}
         - **Dimensions:** {model.get_sentence_embedding_dimension()}
         - **Max Sequence Length:** {model.max_seq_length} tokens
         - **Languages:** 100+
         - **License:** Apache 2.0
         - **Normalization:** L2 normalized (ready for cosine similarity)
         ## Advantages
+        ✅ **Best Multilingual Performance** - Top tier on MTEB leaderboard
         ✅ **Massive Context** - 32K tokens (vs 512 for most models)
         ✅ **Instruction-Aware** - Can customize for specific domains
+        ✅ **Flexible Dimensions** - 32 to 2560 dimensions (matryoshka truncation)
         ✅ **Code-Switching** - Handles mixed-language text
+        ✅ **Production Optimized** - 60% retention at 1536 dims (best in class)
         ## Resources
+        - [Model Card](https://huggingface.co/Qwen/Qwen3-Embedding-4B)
+        - [Qwen3-Embedding GitHub](https://github.com/QwenLM/Qwen3-Embedding)
+        - [Qwen Blog](https://qwenlm.github.io/)
         - [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
         """)
 # Launch with API enabled
 if __name__ == "__main__":
     demo.launch()