TiniThingsInc commited on
Commit
d49b11e
Β·
verified Β·
1 Parent(s): 2114026
Files changed (1) hide show
  1. app.py +78 -34
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- FairFate Embeddings API - Qwen3-Embedding-0.6B
3
  Multilingual semantic embeddings for tabletop RPG product classification
4
  """
5
 
@@ -10,12 +10,15 @@ from typing import List, Union
10
  import spaces # ZeroGPU decorator
11
 
12
  # Load model once at startup
13
- MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"
 
 
14
  print(f"πŸ”„ Loading model: {MODEL_NAME}")
15
  model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
16
  print(f"βœ… Model loaded successfully")
17
- print(f" Dimensions: {model.get_sentence_embedding_dimension()}")
18
  print(f" Max Seq Length: {model.max_seq_length}")
 
19
 
20
  # Optional: Add instruction prefix for RPG domain (improves accuracy by 1-5%)
21
  INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
@@ -24,22 +27,24 @@ INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
24
  def generate_embeddings(
25
  texts: Union[str, List[str]],
26
  use_instruction: bool = True,
27
- output_dimensions: int = 1024
28
  ) -> List[List[float]]:
29
  """
30
- Generate embeddings for text(s)
 
31
  Args:
32
  texts: Single string or list of strings
33
  use_instruction: Whether to prepend instruction prefix (recommended)
34
- output_dimensions: Output embedding size (32-1024)
 
35
  Returns:
36
- List of embedding vectors
37
  """
38
  # Handle single string
39
  if isinstance(texts, str):
40
  texts = [texts]
41
 
42
- # Add instruction prefix if enabled (Qwen3 is instruction-aware)
43
  if use_instruction:
44
  texts = [INSTRUCTION_PREFIX + text for text in texts]
45
 
@@ -52,10 +57,15 @@ def generate_embeddings(
52
  batch_size=32
53
  )
54
 
55
- # Resize embeddings if needed (MRL - Multilingual Representation Learning)
56
- if output_dimensions != 1024:
57
- # Qwen3 supports flexible dimensions (32-1024)
58
- # Simply truncate for smaller dimensions
 
 
 
 
 
59
  embeddings = embeddings[:, :output_dimensions]
60
 
61
  # Convert to list for JSON serialization
@@ -215,12 +225,16 @@ def calculate_similarity(text1: str, text2: str, use_instruction: bool) -> str:
215
  with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft()) as demo:
216
  gr.Markdown("""
217
  # 🎲 FairFate Embeddings API
218
- **Powered by Qwen3-Embedding-0.6B** - #1 Multilingual Embedding Model
 
 
219
  - 🌍 **100+ Languages** (English, Spanish, French, German, Chinese, Japanese, etc.)
220
- - πŸ“ **1024 Dimensions** (flexible 32-1024)
221
  - πŸ“š **32K Context** (massive text support)
222
  - ⚑ **Instruction-Aware** (optimized for RPG content)
223
- - πŸ† **#1 on MTEB** Multilingual Leaderboard
 
 
224
  Perfect for: Product classification, semantic search, recommendations, multilingual matching
225
  """)
226
 
@@ -239,8 +253,8 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
239
  )
240
  use_inst = gr.Checkbox(label="Use instruction prefix (recommended for RPG content)", value=True)
241
  output_dims = gr.Slider(
242
- minimum=32, maximum=1024, value=1024, step=32,
243
- label="Output Dimensions"
244
  )
245
  submit_btn = gr.Button("Generate Embeddings", variant="primary")
246
 
@@ -251,8 +265,8 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
251
 
252
  gr.Examples(
253
  examples=[
254
- ["D&D 5E epic fantasy adventure with dragons and dungeons", True, 1024],
255
- ["Cyberpunk shadowrun detective noir campaign\nPathfinder 2E beginner box starter set\nCall of Cthulhu horror investigation", True, 1024],
256
  ],
257
  inputs=[input_text, use_inst, output_dims],
258
  )
@@ -260,12 +274,14 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
260
  with gr.Tab("πŸ” Similarity Calculator"):
261
  gr.Markdown("""
262
  **Comprehensive Similarity Analysis** - Compare two texts using multiple metrics:
 
263
  - **Cosine Similarity**: Angle between vectors (best for semantic meaning)
264
  - **Jaccard Similarity**: Intersection over union (set-like comparison)
265
  - **SΓΈrensen-Dice**: Weighted intersection (emphasizes shared features)
266
  - **Euclidean Distance/Similarity**: Straight-line distance in vector space
267
  - **Manhattan Distance**: Grid-based distance (L1 norm)
268
  - **Pearson Correlation**: Linear relationship between vectors
 
269
  Perfect for duplicate detection, classification testing, and understanding product relationships!
270
  """)
271
 
@@ -301,31 +317,41 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
301
  with gr.Tab("πŸ“– API Documentation"):
302
  gr.Markdown("""
303
  ## πŸš€ Quick Start
 
304
  ### Python
 
305
  ```python
306
  import requests
307
  import numpy as np
 
308
  url = "https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict"
 
309
  # Generate embeddings
310
  texts = [
311
  "Storm King's Thunder - Epic D&D 5E adventure",
312
  "Curse of Strahd - Gothic horror campaign"
313
  ]
 
314
  response = requests.post(
315
  url,
316
  json={
317
- "data": [texts, True, 1024], # [texts, use_instruction, dimensions]
318
  "fn_index": 0 # Index of generate_embeddings function
319
  }
320
  )
 
321
  result = response.json()
322
  embeddings = result["data"][0]
 
323
  print(f"Generated {len(embeddings)} embeddings")
324
- print(f"Dimensions: {len(embeddings[0])}")
325
  ```
 
326
  ### TypeScript/JavaScript
 
327
  ```typescript
328
  const url = 'https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict';
 
329
  const response = await fetch(url, {
330
  method: 'POST',
331
  headers: { 'Content-Type': 'application/json' },
@@ -333,50 +359,63 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
333
  data: [
334
  ["Your text here", "Another text"],
335
  true, // use_instruction
336
- 1024 // output_dimensions
337
  ],
338
  fn_index: 0
339
  })
340
  });
 
341
  const result = await response.json();
342
- const embeddings = result.data[0];
343
  ```
 
344
  ### cURL
 
345
  ```bash
346
  curl -X POST \\
347
  https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict \\
348
  -H "Content-Type: application/json" \\
349
  -d '{
350
- "data": [["Your text here"], true, 1024],
351
  "fn_index": 0
352
  }'
353
  ```
 
354
  ## πŸ“Š Parameters
 
355
  | Parameter | Type | Default | Description |
356
  |-----------|------|---------|-------------|
357
  | `texts` | string[] | required | Array of texts to embed |
358
  | `use_instruction` | boolean | true | Add instruction prefix (improves accuracy) |
359
- | `output_dimensions` | number | 1024 | Output size (32-1024) |
 
360
  ## 🎯 Use Cases
 
361
  - **Product Classification**: Auto-tag by genre, system, theme
362
  - **Semantic Search**: Find by meaning, not keywords
363
  - **Recommendations**: "Similar products"
364
  - **Duplicate Detection**: Find similar listings
365
  - **Multilingual Matching**: Cross-language similarity
 
366
  ## ⚑ Performance
 
367
  | Batch Size | GPU Throughput | CPU Throughput |
368
  |------------|----------------|----------------|
369
  | 1 | ~800/sec | ~80/sec |
370
  | 32 | ~4000/sec | ~250/sec |
 
371
  ## 🌍 Supported Languages
 
372
  English, Spanish, French, German, Italian, Portuguese, Russian, Polish, Dutch, Czech,
373
  Chinese, Japanese, Korean, Arabic, Hebrew, Hindi, Thai, Vietnamese, Indonesian,
374
  Turkish, Swedish, Norwegian, Danish, Finnish, Greek, Romanian, Hungarian, and 80+ more!
 
375
  ## πŸ“ Citation
 
376
  ```bibtex
377
- @misc{qwen3embedding2025,
378
- title={Qwen3 Embedding},
379
- author={Alibaba Cloud},
380
  year={2025},
381
  url={https://github.com/QwenLM/Qwen3-Embedding}
382
  }
@@ -386,26 +425,31 @@ with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft())
386
  with gr.Tab("ℹ️ Model Info"):
387
  gr.Markdown(f"""
388
  ## Model Details
 
389
  - **Model:** {MODEL_NAME}
390
  - **Dimensions:** {model.get_sentence_embedding_dimension()}
391
  - **Max Sequence Length:** {model.max_seq_length} tokens
392
  - **Languages:** 100+
393
  - **License:** Apache 2.0
394
  - **Normalization:** L2 normalized (ready for cosine similarity)
 
395
  ## Advantages
396
- βœ… **Best Multilingual Performance** - #1 on MTEB leaderboard
 
397
  βœ… **Massive Context** - 32K tokens (vs 512 for most models)
398
  βœ… **Instruction-Aware** - Can customize for specific domains
399
- βœ… **Flexible Dimensions** - 32 to 1024 dimensions
400
  βœ… **Code-Switching** - Handles mixed-language text
 
 
401
  ## Resources
402
- - [Model Card](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B)
403
- - [GitHub](https://github.com/QwenLM/Qwen3-Embedding)
404
- - [Blog Post](https://qwenlm.github.io/blog/qwen3-embedding/)
 
405
  - [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
406
  """)
407
 
408
  # Launch with API enabled
409
  if __name__ == "__main__":
410
  demo.launch()
411
-
 
1
  """
2
+ FairFate Embeddings API - Qwen3-Embedding-4B
3
  Multilingual semantic embeddings for tabletop RPG product classification
4
  """
5
 
 
10
  import spaces # ZeroGPU decorator
11
 
12
  # Load model once at startup
13
+ # Using Qwen3-Embedding-4B for 2560 native dimensions (truncate to 1536 for production)
14
+ # Qwen3-4B is optimal for 1536 dims: 60% retention (vs 42.9% for GTE-Qwen2-7B)
15
+ MODEL_NAME = "Qwen/Qwen3-Embedding-4B"
16
  print(f"πŸ”„ Loading model: {MODEL_NAME}")
17
  model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
18
  print(f"βœ… Model loaded successfully")
19
+ print(f" Native Dimensions: {model.get_sentence_embedding_dimension()}")
20
  print(f" Max Seq Length: {model.max_seq_length}")
21
+ print(f" Matryoshka Support: Yes (truncate to any dimension ≀ {model.get_sentence_embedding_dimension()})")
22
 
23
  # Optional: Add instruction prefix for RPG domain (improves accuracy by 1-5%)
24
  INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: "
 
27
  def generate_embeddings(
28
  texts: Union[str, List[str]],
29
  use_instruction: bool = True,
30
+ output_dimensions: int = 1536
31
  ) -> List[List[float]]:
32
  """
33
+ Generate embeddings for text(s) with matryoshka truncation
34
+
35
  Args:
36
  texts: Single string or list of strings
37
  use_instruction: Whether to prepend instruction prefix (recommended)
38
+ output_dimensions: Output embedding size (32-3584, default 1536 for production)
39
+
40
  Returns:
41
+ List of embedding vectors (L2 normalized)
42
  """
43
  # Handle single string
44
  if isinstance(texts, str):
45
  texts = [texts]
46
 
47
+ # Add instruction prefix if enabled (Qwen3-Embedding models are instruction-aware)
48
  if use_instruction:
49
  texts = [INSTRUCTION_PREFIX + text for text in texts]
50
 
 
57
  batch_size=32
58
  )
59
 
60
+ # Get native dimensions
61
+ native_dims = model.get_sentence_embedding_dimension()
62
+
63
+ # Matryoshka truncation: Simply take first N dimensions
64
+ # Qwen3-Embedding models support truncation to any dimension ≀ native_dims
65
+ if output_dimensions != native_dims:
66
+ if output_dimensions > native_dims:
67
+ print(f"⚠️ Warning: Requested {output_dimensions} dims but model has {native_dims}. Using {native_dims}.")
68
+ output_dimensions = native_dims
69
  embeddings = embeddings[:, :output_dimensions]
70
 
71
  # Convert to list for JSON serialization
 
225
  with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft()) as demo:
226
  gr.Markdown("""
227
  # 🎲 FairFate Embeddings API
228
+
229
+ **Powered by Qwen3-Embedding-4B** - Advanced Multilingual Embedding Model
230
+
231
  - 🌍 **100+ Languages** (English, Spanish, French, German, Chinese, Japanese, etc.)
232
+ - πŸ“ **2560 Native Dimensions** (matryoshka truncation to 1536 for production)
233
  - πŸ“š **32K Context** (massive text support)
234
  - ⚑ **Instruction-Aware** (optimized for RPG content)
235
+ - πŸ”¬ **Matryoshka Support** (flexible 32-2560 dimensions)
236
+ - πŸ† **Optimal for 1536 dims** (60% dimension retention)
237
+
238
  Perfect for: Product classification, semantic search, recommendations, multilingual matching
239
  """)
240
 
 
253
  )
254
  use_inst = gr.Checkbox(label="Use instruction prefix (recommended for RPG content)", value=True)
255
  output_dims = gr.Slider(
256
+ minimum=32, maximum=2560, value=1536, step=32,
257
+ label="Output Dimensions (Production: 1536)"
258
  )
259
  submit_btn = gr.Button("Generate Embeddings", variant="primary")
260
 
 
265
 
266
  gr.Examples(
267
  examples=[
268
+ ["D&D 5E epic fantasy adventure with dragons and dungeons", True, 1536],
269
+ ["Cyberpunk shadowrun detective noir campaign\nPathfinder 2E beginner box starter set\nCall of Cthulhu horror investigation", True, 1536],
270
  ],
271
  inputs=[input_text, use_inst, output_dims],
272
  )
 
274
  with gr.Tab("πŸ” Similarity Calculator"):
275
  gr.Markdown("""
276
  **Comprehensive Similarity Analysis** - Compare two texts using multiple metrics:
277
+
278
  - **Cosine Similarity**: Angle between vectors (best for semantic meaning)
279
  - **Jaccard Similarity**: Intersection over union (set-like comparison)
280
  - **SΓΈrensen-Dice**: Weighted intersection (emphasizes shared features)
281
  - **Euclidean Distance/Similarity**: Straight-line distance in vector space
282
  - **Manhattan Distance**: Grid-based distance (L1 norm)
283
  - **Pearson Correlation**: Linear relationship between vectors
284
+
285
  Perfect for duplicate detection, classification testing, and understanding product relationships!
286
  """)
287
 
 
317
  with gr.Tab("πŸ“– API Documentation"):
318
  gr.Markdown("""
319
  ## πŸš€ Quick Start
320
+
321
  ### Python
322
+
323
  ```python
324
  import requests
325
  import numpy as np
326
+
327
  url = "https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict"
328
+
329
  # Generate embeddings
330
  texts = [
331
  "Storm King's Thunder - Epic D&D 5E adventure",
332
  "Curse of Strahd - Gothic horror campaign"
333
  ]
334
+
335
  response = requests.post(
336
  url,
337
  json={
338
+ "data": [texts, True, 1536], # [texts, use_instruction, dimensions]
339
  "fn_index": 0 # Index of generate_embeddings function
340
  }
341
  )
342
+
343
  result = response.json()
344
  embeddings = result["data"][0]
345
+
346
  print(f"Generated {len(embeddings)} embeddings")
347
+ print(f"Dimensions: {len(embeddings[0])}") # Should output 1536
348
  ```
349
+
350
  ### TypeScript/JavaScript
351
+
352
  ```typescript
353
  const url = 'https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict';
354
+
355
  const response = await fetch(url, {
356
  method: 'POST',
357
  headers: { 'Content-Type': 'application/json' },
 
359
  data: [
360
  ["Your text here", "Another text"],
361
  true, // use_instruction
362
+ 1536 // output_dimensions (production default)
363
  ],
364
  fn_index: 0
365
  })
366
  });
367
+
368
  const result = await response.json();
369
+ const embeddings = result.data[0]; // Array of 1536-dim vectors
370
  ```
371
+
372
  ### cURL
373
+
374
  ```bash
375
  curl -X POST \\
376
  https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict \\
377
  -H "Content-Type: application/json" \\
378
  -d '{
379
+ "data": [["Your text here"], true, 1536],
380
  "fn_index": 0
381
  }'
382
  ```
383
+
384
  ## πŸ“Š Parameters
385
+
386
  | Parameter | Type | Default | Description |
387
  |-----------|------|---------|-------------|
388
  | `texts` | string[] | required | Array of texts to embed |
389
  | `use_instruction` | boolean | true | Add instruction prefix (improves accuracy) |
390
+ | `output_dimensions` | number | 1536 | Output size (32-3584, production default: 1536) |
391
+
392
  ## 🎯 Use Cases
393
+
394
  - **Product Classification**: Auto-tag by genre, system, theme
395
  - **Semantic Search**: Find by meaning, not keywords
396
  - **Recommendations**: "Similar products"
397
  - **Duplicate Detection**: Find similar listings
398
  - **Multilingual Matching**: Cross-language similarity
399
+
400
  ## ⚑ Performance
401
+
402
  | Batch Size | GPU Throughput | CPU Throughput |
403
  |------------|----------------|----------------|
404
  | 1 | ~800/sec | ~80/sec |
405
  | 32 | ~4000/sec | ~250/sec |
406
+
407
  ## 🌍 Supported Languages
408
+
409
  English, Spanish, French, German, Italian, Portuguese, Russian, Polish, Dutch, Czech,
410
  Chinese, Japanese, Korean, Arabic, Hebrew, Hindi, Thai, Vietnamese, Indonesian,
411
  Turkish, Swedish, Norwegian, Danish, Finnish, Greek, Romanian, Hungarian, and 80+ more!
412
+
413
  ## πŸ“ Citation
414
+
415
  ```bibtex
416
+ @misc{qwen3-embedding-2025,
417
+ title={Qwen3-Embedding: Multilingual Text Embedding Models},
418
+ author={Qwen Team, Alibaba Cloud},
419
  year={2025},
420
  url={https://github.com/QwenLM/Qwen3-Embedding}
421
  }
 
425
  with gr.Tab("ℹ️ Model Info"):
426
  gr.Markdown(f"""
427
  ## Model Details
428
+
429
  - **Model:** {MODEL_NAME}
430
  - **Dimensions:** {model.get_sentence_embedding_dimension()}
431
  - **Max Sequence Length:** {model.max_seq_length} tokens
432
  - **Languages:** 100+
433
  - **License:** Apache 2.0
434
  - **Normalization:** L2 normalized (ready for cosine similarity)
435
+
436
  ## Advantages
437
+
438
+ βœ… **Best Multilingual Performance** - Top tier on MTEB leaderboard
439
  βœ… **Massive Context** - 32K tokens (vs 512 for most models)
440
  βœ… **Instruction-Aware** - Can customize for specific domains
441
+ βœ… **Flexible Dimensions** - 32 to 2560 dimensions (matryoshka truncation)
442
  βœ… **Code-Switching** - Handles mixed-language text
443
+ βœ… **Production Optimized** - 60% retention at 1536 dims (best in class)
444
+
445
  ## Resources
446
+
447
+ - [Model Card](https://huggingface.co/Qwen/Qwen3-Embedding-4B)
448
+ - [Qwen3-Embedding GitHub](https://github.com/QwenLM/Qwen3-Embedding)
449
+ - [Qwen Blog](https://qwenlm.github.io/)
450
  - [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
451
  """)
452
 
453
  # Launch with API enabled
454
  if __name__ == "__main__":
455
  demo.launch()