| import os.path |
|
|
| import torch |
| import torch.nn as nn |
| from transformers import RobertaTokenizerFast, RobertaForMaskedLM |
| import streamlit as st |
|
|
|
|
| class SimpleClassifier(nn.Module): |
| def __init__(self, in_features: int, hidden_features: int, |
| out_features: int, activation=nn.ReLU()): |
| super().__init__() |
| self.bn = nn.BatchNorm1d(in_features) |
| self.in2hid = nn.Linear(in_features, hidden_features) |
| self.activation = activation |
| self.hid2hid = nn.Linear(hidden_features, hidden_features) |
| self.hid2out = nn.Linear(hidden_features, out_features) |
|
|
|
|
| |
| self.bn2 = nn.BatchNorm1d(hidden_features) |
|
|
| def forward(self, X): |
| X = self.bn(X) |
| X = self.in2hid(X) |
|
|
| X = self.activation(X) |
| X = self.hid2hid(torch.concat((X,), 1)) |
|
|
| X = self.activation(X) |
| X = self.hid2out(torch.concat((X,), 1)) |
|
|
| X = nn.functional.sigmoid(X) |
| return X |
|
|
|
|
| @st.cache_data() |
| def load_models(): |
| model = RobertaForMaskedLM.from_pretrained("roberta-base") |
| model.lm_head = nn.Identity() |
| tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base") |
| my_classifier = SimpleClassifier(768, 768, 1) |
| weights_path = os.path.join(__file__, "..", "twitter_model_91_5-.pth") |
| my_classifier.load_state_dict(torch.load(weights_path, map_location=device)) |
| my_classifier.eval() |
| return { |
| "tokenizer": tokenizer, |
| "model": model, |
| "classifier": my_classifier |
| } |
|
|
|
|
| def classify_text(text: str) -> float: |
| models = load_models() |
| tokenizer, model, classifier = models["tokenizer"], models["model"], models["classifier"] |
|
|
| X = tokenizer( |
| text, |
| truncation=True, |
| max_length=128, |
| return_tensors='pt' |
| )["input_ids"] |
|
|
| X = model.forward(X)[-1][0].sum(axis=0)[None, :] |
| return classifier(X) |
|
|
|
|
| device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") |
|
|