Spaces:
Sleeping
Sleeping
| import torch | |
| import pandas as pd | |
| import streamlit as st | |
| import torch.nn as nn | |
| from transformers import RobertaTokenizer, RobertaModel, PretrainedConfig | |
| def init_model(): | |
| model = RobertaModel(config=PretrainedConfig().from_pretrained("roberta-large-mnli")) | |
| model.pooler = nn.Sequential( | |
| nn.Linear(1024, 256), | |
| nn.LayerNorm(256), | |
| nn.ReLU(), | |
| nn.Linear(256, 8), | |
| nn.Sigmoid() | |
| ) | |
| model_path = "model.pt" | |
| model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) | |
| model.eval() | |
| return model | |
| cats = ["Computer Science", "Economics", "Electrical Engineering", | |
| "Mathematics", "Physics", "Biology", "Finance", "Statistics"] | |
| def predict(outputs): | |
| top = 0 | |
| temp = 100000 | |
| apr_probs = torch.nn.functional.softmax(torch.tensor([39253., 84., 220., 2263., 1214., 909., 66., 10661.]) / temp, dim=0) | |
| probs = nn.functional.softmax(outputs / apr_probs, dim=1).tolist()[0] | |
| top_cats = [] | |
| top_probs = [] | |
| first = True | |
| write_cs = False | |
| for prob, cat in sorted(zip(probs, cats), reverse=True): | |
| if first: | |
| if cat == "Computer Science": | |
| write_cs = True | |
| first = False | |
| if top < 95: | |
| percent = prob * 100 | |
| top += percent | |
| top_cats.append(cat) | |
| top_probs.append(str(round(percent, 1))) | |
| res = pd.DataFrame(top_probs, index=top_cats, columns=['Percent']) | |
| st.write(res) | |
| if write_cs: | |
| st.write("Today everything is connected with Computer Science") | |
| tokenizer = RobertaTokenizer.from_pretrained("roberta-large-mnli") | |
| model = init_model() | |
| st.title("Article classifier") | |
| st.markdown("### Title") | |
| title = st.text_input("*Enter title (required)") | |
| st.markdown("### Abstract") | |
| abstract = st.text_area(" Enter abstract", height=200) | |
| if not title: | |
| st.warning("Please fill in required fields") | |
| else: | |
| try: | |
| st.markdown("### Result") | |
| encoded_input = tokenizer(title + ". " + abstract, return_tensors="pt", padding=True, | |
| max_length=1024, truncation=True) | |
| with torch.no_grad(): | |
| outputs = model(**encoded_input).pooler_output[:, 0, :] | |
| predict(outputs) | |
| except Exception: | |
| st.error("Something went wrong. Try different text") | |