Spaces:

yhn112
/

article_classifier

Sleeping

App Files Files Community

yhn112 commited on Apr 17, 2023

Commit

9a72fe1

1 Parent(s): b5e8297

Add application file

Browse files

Files changed (3) hide show

app.py +78 -0
model.pt +3 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import torch
+import pandas as pd
+import streamlit as st
+import torch.nn as nn
+from transformers import RobertaTokenizer, RobertaModel, PretrainedConfig
+@st.cache_resource
+def init_model():
+    model = RobertaModel(config=PretrainedConfig().from_pretrained("roberta-large-mnli"))
+    model.pooler = nn.Sequential(
+        nn.Linear(1024, 256),
+        nn.LayerNorm(256),
+        nn.ReLU(),
+        nn.Linear(256, 8),
+        nn.Sigmoid()
+    )
+    model_path = "model.pt"
+    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
+    model.eval()
+    return model
+cats = ["Computer Science", "Economics", "Electrical Engineering",
+        "Mathematics", "Physics", "Biology", "Finance", "Statistics"]
+def predict(outputs):
+    top = 0
+    temp = 100000
+    apr_probs = torch.nn.functional.softmax(torch.tensor([39253., 84., 220., 2263., 1214., 909., 66., 10661.]) / temp, dim=0)
+    probs = nn.functional.softmax(outputs / apr_probs, dim=1).tolist()[0]
+    top_cats = []
+    top_probs = []
+    first = True
+    write_cs = False
+    for prob, cat in sorted(zip(probs, cats), reverse=True):
+        if first:
+            if cat == "Computer Science":
+                write_cs = True
+            first = False
+        if top < 95:
+            percent = prob * 100
+            top += percent
+            top_cats.append(cat)
+            top_probs.append(str(round(percent, 1)))
+    res = pd.DataFrame(top_probs, index=top_cats, columns=['Percent'])
+    st.write(res)
+    if write_cs:
+        st.write("Today everything is connected with Computer Science")
+tokenizer = RobertaTokenizer.from_pretrained("roberta-large-mnli")
+model = init_model()
+st.title("Article classifier")
+st.markdown("### Title")
+title = st.text_input("*Enter title (required)")
+st.markdown("### Abstract")
+abstract = st.text_area(" Enter abstract", height=200)
+if not title:
+    st.warning("Please fill in required fields")
+else:
+    try:
+        st.markdown("### Result")
+        encoded_input = tokenizer(title + ". " + abstract, return_tensors="pt", padding=True,
+                              max_length=1024, truncation=True)
+        with torch.no_grad():
+            outputs = model(**encoded_input).pooler_output[:, 0, :]
+            predict(outputs)
+    except Exception:
+        st.error("Something went wrong. Try different text")

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9ce2a83d4d7f59e53ab917fb99ecaeb26f66a14c9f336b898f4924935af2140
+size 1418460457

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+altair==4.0
+pandas
+torch
+tokenizers
+transformers