Spaces:
Sleeping
Sleeping
File size: 14,548 Bytes
5d8e337 ce958e0 5d8e337 2ec6494 d49b11e 34addd4 5d8e337 34addd4 d49b11e 5d8e337 d49b11e 5d8e337 2ec6494 5d8e337 d49b11e 5d8e337 2ec6494 d49b11e 2ec6494 5d8e337 ce958e0 d49b11e 2ec6494 d49b11e 2ec6494 5d8e337 d49b11e 5d8e337 2ec6494 5d8e337 2ec6494 5d8e337 d49b11e ce958e0 d49b11e 5d8e337 2ec6494 ce958e0 2ec6494 ce958e0 5d8e337 2ec6494 5d8e337 2ec6494 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 5d8e337 ce958e0 2ec6494 ce958e0 2ec6494 ce958e0 2ec6494 34addd4 d49b11e 34addd4 d49b11e 5d8e337 2ec6494 5d8e337 ce958e0 5d8e337 c0d22de 5d8e337 d49b11e 5d8e337 ce958e0 5d8e337 ada0395 5d8e337 d49b11e 5d8e337 34addd4 5d8e337 d49b11e 5d8e337 d49b11e 5d8e337 d3df828 5d8e337 d3df828 5d8e337 ce958e0 5d8e337 ada0395 5d8e337 2ec6494 5d8e337 34addd4 2ec6494 34addd4 d49b11e 5d8e337 d49b11e 2ec6494 5d8e337 d49b11e ce958e0 d49b11e 5d8e337 d49b11e 2ec6494 5d8e337 d49b11e 5d8e337 2ec6494 d49b11e 5d8e337 d49b11e 5d8e337 d49b11e 2ec6494 d49b11e 5d8e337 d49b11e 2ec6494 ce958e0 d49b11e 5d8e337 d49b11e 5d8e337 d49b11e 2ec6494 d49b11e 2ec6494 d49b11e 5d8e337 d49b11e 5d8e337 ce958e0 5d8e337 d49b11e 5d8e337 d49b11e 34addd4 d49b11e 5d8e337 d49b11e 34addd4 d49b11e 5d8e337 d49b11e 34addd4 ce958e0 34addd4 d49b11e 5d8e337 d49b11e 34addd4 d49b11e 5d8e337 d49b11e 5d8e337 2ec6494 5d8e337 34addd4 5d8e337 d49b11e 34addd4 d49b11e 5d8e337 d49b11e 34addd4 d49b11e 5d8e337 d49b11e 5d8e337 2ec6494 5d8e337 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
"""
FairFate Embeddings API - Qwen3-Embedding-4B
Multilingual semantic embeddings for tabletop RPG product classification
"""
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
from typing import List, Union
import spaces # ZeroGPU decorator
# Load model once at startup
# Using Qwen3-Embedding-4B for 2560 native dimensions (truncate to 1536 for production)
# Qwen3-4B is optimal for 1536 dims: 60% retention (vs 42.9% for GTE-Qwen2-7B)
MODEL_NAME = "Qwen/Qwen3-Embedding-4B"
print(f" Loading model: {MODEL_NAME}")
model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
print(f" Model loaded successfully")
print(f" Native Dimensions: {model.get_sentence_embedding_dimension()}")
print(f" Max Seq Length: {model.max_seq_length}")
print(f" Matryoshka Support: Yes (truncate to any dimension ≤ {model.get_sentence_embedding_dimension()})")
# Optional: Add instruction prefix for RPG domain (improves accuracy by 1-5%)
INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
@spaces.GPU(duration=60) # ZeroGPU: allocate GPU for 60 seconds
def generate_embeddings(
texts: Union[str, List[str]],
use_instruction: bool = True,
output_dimensions: int = 1536
) -> List[List[float]]:
"""
Generate embeddings for text(s) with matryoshka truncation
Args:
texts: Single string or list of strings
use_instruction: Whether to prepend instruction prefix (recommended)
output_dimensions: Output embedding size (32-2560, default 1536 for production)
Returns:
List of embedding vectors (L2 normalized)
"""
# Handle single string
if isinstance(texts, str):
texts = [texts]
# Add instruction prefix if enabled (Qwen3-Embedding models are instruction-aware)
if use_instruction:
texts = [INSTRUCTION_PREFIX + text for text in texts]
# Generate embeddings
embeddings = model.encode(
texts,
convert_to_numpy=True,
normalize_embeddings=True, # L2 normalize for cosine similarity
show_progress_bar=False,
batch_size=32
)
# Get native dimensions
native_dims = model.get_sentence_embedding_dimension()
# Matryoshka truncation: Simply take first N dimensions
# Qwen3-Embedding models support truncation to any dimension ≤ native_dims
if output_dimensions != native_dims:
if output_dimensions > native_dims:
print(f"⚠️ Warning: Requested {output_dimensions} dims but model has {native_dims}. Using {native_dims}.")
output_dimensions = native_dims
embeddings = embeddings[:, :output_dimensions]
# Convert to list for JSON serialization
return embeddings.tolist()
def batch_generate(texts_input: str, use_instruction: bool, output_dims: int):
"""
Gradio interface for batch embedding generation
Expects newline-separated texts
"""
if not texts_input.strip():
return {"error": "Please provide at least one text"}
texts = [t.strip() for t in texts_input.split('\n') if t.strip()]
try:
embeddings = generate_embeddings(texts, use_instruction, output_dims)
return embeddings
except Exception as e:
return {"error": str(e)}
def calculate_similarity(text1: str, text2: str, use_instruction: bool) -> dict:
"""
Calculate comprehensive similarity metrics between two texts
Returns dict with all similarity metrics
"""
if not text1.strip() or not text2.strip():
return {"error": "Please provide both texts"}
try:
embeddings = generate_embeddings([text1, text2], use_instruction)
# Calculate all similarity metrics
emb1 = np.array(embeddings[0])
emb2 = np.array(embeddings[1])
# Cosine Similarity (for normalized vectors, just dot product)
cosine = float(np.dot(emb1, emb2))
# Euclidean Distance
euclidean_dist = float(np.linalg.norm(emb1 - emb2))
euclidean_sim = 1 / (1 + euclidean_dist)
# Jaccard Similarity (min/max interpretation for continuous vectors)
intersection = np.sum(np.minimum(np.abs(emb1), np.abs(emb2)))
union = np.sum(np.maximum(np.abs(emb1), np.abs(emb2)))
jaccard = float(intersection / union if union > 0 else 0)
# Sorensen-Dice Coefficient
intersection = np.sum(np.minimum(np.abs(emb1), np.abs(emb2)))
sum_magnitudes = np.sum(np.abs(emb1)) + np.sum(np.abs(emb2))
sorensen_dice = float(2 * intersection / sum_magnitudes if sum_magnitudes > 0 else 0)
# Manhattan Distance
manhattan = float(np.sum(np.abs(emb1 - emb2)))
# Pearson Correlation
pearson = float(np.corrcoef(emb1, emb2)[0, 1])
return {
'cosine': cosine,
'euclidean_distance': euclidean_dist,
'euclidean_similarity': euclidean_sim,
'jaccard': jaccard,
'sorensen_dice': sorensen_dice,
'manhattan': manhattan,
'pearson': pearson
}
except Exception as e:
return {"error": str(e)}
# Create Gradio interface
with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# FairFate Embeddings API
**Powered by Qwen3-Embedding-4B** - Advanced Multilingual Embedding Model
- **100+ Languages** (English, Spanish, French, German, Chinese, Japanese, etc.)
- **2560 Native Dimensions** (matryoshka truncation to 1536 for production)
- **32K Context** (massive text support)
- **Instruction-Aware** (optimized for RPG content)
- **Matryoshka Support** (flexible 32-2560 dimensions)
- **Optimal for 1536 dims** (60% dimension retention)
Perfect for: Product classification, semantic search, recommendations, multilingual matching
""")
with gr.Tab("🔮 Generate Embeddings"):
gr.Markdown("""
Generate semantic embeddings for product descriptions, titles, or any text.
Enter one text per line for batch processing.
""")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Input Texts (one per line)",
placeholder="Example:\nStorm King's Thunder - Epic D&D 5E adventure\nCurse of Strahd - Gothic horror campaign\nPathfinder 2E Core Rulebook",
lines=8
)
use_inst = gr.Checkbox(label="Use instruction prefix (recommended for RPG content)", value=True)
output_dims = gr.Slider(
minimum=32, maximum=2560, value=1536, step=32,
label="Output Dimensions (Production: 1536)"
)
submit_btn = gr.Button("Generate Embeddings", variant="primary")
with gr.Column():
output_json = gr.JSON(label="Results")
# Register as API endpoint AND UI button
# api_name parameter makes this callable via @gradio/client as "/batch_generate"
submit_btn.click(
batch_generate,
inputs=[input_text, use_inst, output_dims],
outputs=output_json,
api_name="batch_generate" # Expose as /batch_generate API endpoint
)
gr.Examples(
examples=[
["D&D 5E epic fantasy adventure with dragons and dungeons", True, 1536],
["Cyberpunk shadowrun detective noir campaign\nPathfinder 2E beginner box starter set\nCall of Cthulhu horror investigation", True, 1536],
],
inputs=[input_text, use_inst, output_dims],
)
with gr.Tab("Similarity Calculator"):
gr.Markdown("""
**Comprehensive Similarity Analysis** - Compare two texts using multiple metrics:
- **Cosine Similarity**: Angle between vectors (best for semantic meaning)
- **Jaccard Similarity**: Intersection over union (set-like comparison)
- **Sørensen-Dice**: Weighted intersection (emphasizes shared features)
- **Euclidean Distance/Similarity**: Straight-line distance in vector space
- **Manhattan Distance**: Grid-based distance (L1 norm)
- **Pearson Correlation**: Linear relationship between vectors
Perfect for duplicate detection, classification testing, and understanding product relationships!
""")
with gr.Row():
with gr.Column():
text1 = gr.Textbox(
label="First Text",
placeholder="Storm King's Thunder - Giant-themed D&D adventure",
lines=3
)
text2 = gr.Textbox(
label="Second Text",
placeholder="Princes of the Apocalypse - Elemental evil campaign",
lines=3
)
use_inst_sim = gr.Checkbox(label="Use instruction prefix", value=True)
calc_btn = gr.Button("Calculate Similarity", variant="primary")
with gr.Column():
similarity_output = gr.JSON(label="Similarity Result")
# Register as API endpoint AND UI button
calc_btn.click(
calculate_similarity,
inputs=[text1, text2, use_inst_sim],
outputs=similarity_output,
api_name="calculate_similarity" # Expose as /calculate_similarity API endpoint
)
gr.Examples(
examples=[
["D&D 5E fantasy adventure", "Dungeons and Dragons fifth edition module", True],
["Horror investigation mystery", "Comedy fantasy lighthearted fun", True],
["Pathfinder 2E rulebook", "D&D 5E Player's Handbook", True],
],
inputs=[text1, text2, use_inst_sim],
)
with gr.Tab("API Documentation"):
gr.Markdown("""
## Quick Start
### Python
```python
import requests
import numpy as np
url = "https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict"
# Generate embeddings
texts = [
"Storm King's Thunder - Epic D&D 5E adventure",
"Curse of Strahd - Gothic horror campaign"
]
response = requests.post(
url,
json={
"data": [texts, True, 1536], # [texts, use_instruction, dimensions]
"fn_index": 0 # Index of generate_embeddings function
}
)
result = response.json()
embeddings = result["data"][0]
print(f"Generated {len(embeddings)} embeddings")
print(f"Dimensions: {len(embeddings[0])}") # Should output 1536
```
### TypeScript/JavaScript
```typescript
const url = 'https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict';
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
data: [
["Your text here", "Another text"],
true, // use_instruction
1536 // output_dimensions (production default)
],
fn_index: 0
})
});
const result = await response.json();
const embeddings = result.data[0]; // Array of 1536-dim vectors
```
### cURL
```bash
curl -X POST \\
https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict \\
-H "Content-Type: application/json" \\
-d '{
"data": [["Your text here"], true, 1536],
"fn_index": 0
}'
```
## Parameters
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `texts` | string[] | required | Array of texts to embed |
| `use_instruction` | boolean | true | Add instruction prefix (improves accuracy) |
| `output_dimensions` | number | 1536 | Output size (32-3584, production default: 1536) |
## Use Cases
- **Product Classification**: Auto-tag by genre, system, theme
- **Semantic Search**: Find by meaning, not keywords
- **Recommendations**: "Similar products"
- **Duplicate Detection**: Find similar listings
- **Multilingual Matching**: Cross-language similarity
## Performance
| Batch Size | GPU Throughput | CPU Throughput |
|------------|----------------|----------------|
| 1 | ~800/sec | ~80/sec |
| 32 | ~4000/sec | ~250/sec |
## Supported Languages
English, Spanish, French, German, Italian, Portuguese, Russian, Polish, Dutch, Czech,
Chinese, Japanese, Korean, Arabic, Hebrew, Hindi, Thai, Vietnamese, Indonesian,
Turkish, Swedish, Norwegian, Danish, Finnish, Greek, Romanian, Hungarian, and 80+ more!
## Citation
```bibtex
@misc{qwen3-embedding-2025,
title={Qwen3-Embedding: Multilingual Text Embedding Models},
author={Qwen Team, Alibaba Cloud},
year={2025},
url={https://github.com/QwenLM/Qwen3-Embedding}
}
```
""")
with gr.Tab("Model Info"):
gr.Markdown(f"""
## Model Details
- **Model:** {MODEL_NAME}
- **Dimensions:** {model.get_sentence_embedding_dimension()}
- **Max Sequence Length:** {model.max_seq_length} tokens
- **Languages:** 100+
- **License:** Apache 2.0
- **Normalization:** L2 normalized (ready for cosine similarity)
## Advantages
**Best Multilingual Performance** - Top tier on MTEB leaderboard
**Massive Context** - 32K tokens (vs 512 for most models)
**Instruction-Aware** - Can customize for specific domains
**Flexible Dimensions** - 32 to 2560 dimensions (matryoshka truncation)
**Code-Switching** - Handles mixed-language text
**Production Optimized** - 60% retention at 1536 dims (best in class)
## Resources
- [Model Card](https://huggingface.co/Qwen/Qwen3-Embedding-4B)
- [Qwen3-Embedding GitHub](https://github.com/QwenLM/Qwen3-Embedding)
- [Qwen Blog](https://qwenlm.github.io/)
- [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
""")
# Launch with API enabled
if __name__ == "__main__":
demo.launch()
|