cardiffnlp/tweet_eval
Viewer • Updated • 201k • 35.8k • 143
How to use AK776161/birdseye_roberta-base-tweet-eval with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-classification", model="AK776161/birdseye_roberta-base-tweet-eval") # Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("AK776161/birdseye_roberta-base-tweet-eval")
model = AutoModelForSequenceClassification.from_pretrained("AK776161/birdseye_roberta-base-tweet-eval")# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("AK776161/birdseye_roberta-base-tweet-eval")
model = AutoModelForSequenceClassification.from_pretrained("AK776161/birdseye_roberta-base-tweet-eval")This is roBERTa-base model fine tuned on 8 datasets with ~20 M tweets this model is suitable for english while can do a fine job on other languages.
Git Repo: SENTIMENTANALYSIS-PROJECT
Demo: BYRD'S I
labels: 0 -> Negative; 1 -> Neutral; 2 -> Positive;
Model Metrics
Accuracy: ~96%
Sparse Categorical Accuracy: 0.9597
Loss: 0.1144
val_loss -- [onLast_train] : 0.1482
Note:
Due to dataset discrepencies of Neutral data we published another model
Byrd's I only positive_negative model to find only neutral data and have used
AdaBoot method to get the accurate output.
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
from transformers import TFAutoModelForSequenceClassification
import pandas as pd
import numpy as np
import tensorflow
# model 0
tokenizer = AutoTokenizer.from_pretrained("AK776161/birdseye_roberta-base-18", use_fast = True)
model = AutoModelForSequenceClassification.from_pretrained("AK776161/birdseye_roberta-base-18", from_tf=True)
# model1
tokenizer1 = AutoTokenizer.from_pretrained("AK776161/birdseye_roberta-base-tweet-eval", use_fast = True)
model1 = AutoModelForSequenceClassification.from_pretrained("AK776161/birdseye_roberta-base-tweet-eval",from_tf=True)
#-----------------------Adaboot technique---------------------------
def nparraymeancalc(arr1, arr2):
returner = []
for i in range(0,len(arr1)):
if(arr1[i][1] < -7):
arr1[i][1] = 0
returner.append(np.mean([arr1[i],arr2[i]], axis = 0))
return np.array(returner)
def predictions(tokenizedtext):
output1 = model(**tokenizedtext)
output2 = model1(**tokenizedtext)
logits1 = output1.logits
logits1 = logits1.detach().numpy()
logits2 = output2.logits
logits2 = logits2.detach().numpy()
# print(logits1, logits2)
predictionresult = nparraymeancalc(logits1,logits2)
return np.array(predictionresult)
def labelassign(predictionresult):
labels = []
for i in predictionresult:
label_id = i.argmax()
labels.append(label_id)
return labels
tokenizeddata = tokenizer("----YOUR_TEXT---", return_tensors = 'pt', padding = True, truncation = True)
result = predictions(tokenizeddata)
print(labelassign(result))
Output for "I LOVE YOU":
1) Positive: 0.994
2) Negative: 0.000
3) Neutral: 0.006
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="AK776161/birdseye_roberta-base-tweet-eval")