Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import streamlit as st | |
| import torch | |
| # from transformers import AutoTokenizer, AutoModel | |
| # from sentence_transformers import util | |
| class SentenceSimiliarity(): | |
| def __init__(self, model_name, sentence1, sentence2): | |
| self.KEY = os.getenv("HF_KEY") | |
| self.headers = {"Authorization": f"Bearer {self.KEY}"} | |
| self.sentence1 = sentence1 | |
| self.sentence2 = sentence2 | |
| self.api_url = f"https://api-inference.huggingface.co/models/{model_name}" | |
| # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| # self.model = AutoModel.from_pretrained(self.model_name) | |
| def model_selection(self): | |
| available_models = [ | |
| # "distilbert-base-uncased", | |
| # "bert-base-uncased", | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| "sentence-transformers/all-mpnet-base-v2", | |
| "sentence-transformers/distiluse-base-multilingual-cased-v2", | |
| "intfloat/e5-small", | |
| "intfloat/e5-base", | |
| "intfloat/e5-large-v2", | |
| "intfloat/multilingual-e5-base", | |
| # "togethercomputer/m2-bert-80M-32k-retrieval", | |
| # "togethercomputer/m2-bert-80M-8k-retrieval", | |
| # "togethercomputer/m2-bert-80M-2k-retrieval", | |
| ] | |
| self.model_name = st.sidebar.selectbox( | |
| label="Select Your Models", | |
| options=available_models, | |
| ) | |
| # def tokenize(self): | |
| # tokenized1 = self.tokenizer( | |
| # self.sentence1, | |
| # return_tensors='pt', | |
| # padding=True, | |
| # truncation=True | |
| # ) | |
| # tokenized2 = self.tokenizer( | |
| # self.sentence2, | |
| # return_tensors='pt', | |
| # padding=True, | |
| # truncation=True | |
| # ) | |
| # return tokenized1, tokenized2 | |
| # def get_embeddings(self): | |
| # # tokenized1, tokenized2 = self.tokenize() | |
| # with torch.no_grad(): | |
| # embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1) | |
| # embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1) | |
| # return embeddings1, embeddings2 | |
| # def get_similarity_scores(self): | |
| # embeddings1, embeddings2 = self.get_embeddings() | |
| # scores = util.cos_sim(embeddings1, embeddings2) | |
| # return scores | |
| def query(self, payload): | |
| response = requests.post(self.api_url, headers=self.headers, json=payload) | |
| return response.json() | |
| def results(self): | |
| scores = self.query({ | |
| "inputs": { | |
| "source_sentence": self.sentence1, | |
| "sentences": [ | |
| self.sentence2, | |
| ] | |
| }, | |
| }) | |
| # scores = self.get_similarity_scores() | |
| statement = f"The sentence has {scores[0] * 100:.2f}% similarity" | |
| # statement = scores | |
| return statement | |
| class UI(): | |
| def __init__(self): | |
| st.title("Sentence Similiarity Checker") | |
| st.caption("You can use this for checking similarity between resume and job description") | |
| def get(self): | |
| available_models = [ | |
| # "distilbert-base-uncased", | |
| # "bert-base-uncased", | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| "sentence-transformers/all-mpnet-base-v2", | |
| "sentence-transformers/distiluse-base-multilingual-cased-v2", | |
| "intfloat/e5-small", | |
| "intfloat/e5-base", | |
| "intfloat/e5-large-v2", | |
| "intfloat/multilingual-e5-base", | |
| # "togethercomputer/m2-bert-80M-32k-retrieval", | |
| # "togethercomputer/m2-bert-80M-8k-retrieval", | |
| # "togethercomputer/m2-bert-80M-2k-retrieval", | |
| ] | |
| self.model_name = st.sidebar.selectbox( | |
| label="Select Your Models", | |
| options=available_models, | |
| ) | |
| self.sentence1 = st.text_area( | |
| label="Sentence 1", | |
| help="This is a parent text the next text will be compared with this text" | |
| ) | |
| self.sentence2 = st.text_area( | |
| label="Sentence 2", | |
| help="This is a child text" | |
| ) | |
| self.button = st.button( | |
| label="Check", | |
| help='Check Sentence Similarity' | |
| ) | |
| def result(self): | |
| self.get() | |
| ss = SentenceSimiliarity(self.model_name, self.sentence1, self.sentence2) | |
| if self.button: | |
| st.text(ss.results()) | |
| # print(ss.results()) | |
| ui = UI() | |
| ui.result() | |