Muedgar's picture
Create app.py
fcf60f2
raw
history blame
2.72 kB
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import gradio as gr
def rainPrediction(fileCSVName):
#Importing necessary libraries
#Storing the values from the dataset in a variable
if fileCSVName == "weatherAUS.csv":
dataset = pd.read_csv("/weatherAUS.csv")
#D
X = dataset.iloc[:,[1,2,3,4,7,8,9,10,11,12,13,14,15,16,18,19,20,21]].values
Y = dataset.iloc[:,-1].values
#Reshaping Y from a 1-dimensional(a[n]) array into a 2-dimensional(a[n][m]) array
Y = Y.reshape(-1,1)
#Removing NA from the dataset and replacing it with the most frequent value in that column
imputer = SimpleImputer(missing_values=np.nan,strategy='most_frequent')
X = imputer.fit_transform(X)
Y = imputer.fit_transform(Y)
#Encoding non-numerical(i.e: W,WNW) values into numerical values(i.e: 1,2,3,4)
le1 = LabelEncoder()
X[:,0] = le1.fit_transform(X[:,0])
le2 = LabelEncoder()
X[:,4] = le2.fit_transform(X[:,4])
le3 = LabelEncoder()
X[:,6] = le3.fit_transform(X[:,6])
le4 = LabelEncoder()
X[:,7] = le4.fit_transform(X[:,7])
le5 = LabelEncoder()
X[:,-1] = le5.fit_transform(X[:,-1])
le6 = LabelEncoder()
Y = le6.fit_transform(Y)
#Feature scaling to minimize data scattering
sc = StandardScaler()
X = sc.fit_transform(X)
#Dividing the dataset into 2 parts namely training data and testing data
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)
#Training our model
classifier = RandomForestClassifier(n_estimators=100,random_state=0)
classifier.fit(X_train,Y_train)
classifier.score(X_train,Y_train)
Y_test = Y_test.reshape(-1,1)
Y_pred = classifier.predict(X_test)
Y_pred = le6.inverse_transform(Y_pred)
Y_test = le6.inverse_transform(Y_test)
Y_test = Y_test.reshape(-1,1)
Y_pred = Y_pred.reshape(-1,1)
#Concatenating our test and prediction result into a dataset
df = np.concatenate((Y_test,Y_pred),axis=1)
dataframe = pd.DataFrame(df,columns=['Rain Tomorrow','Rain Prediction'])
#Checking the accuracy
print(accuracy_score(Y_test,Y_pred))
#Print .csv file
#answer = dataframe.to_csv("predictions.csv")
# return pd.read_csv("predictions.csv")
return dataframe
app = gr.Interface(rainPrediction, "text", gr.outputs.Dataframe(headers=["Rain Tomorrow", "Rain Prediction"],label="All data"))
app.launch(debug=True)