diff --git a/app.py b/app.py
index 7aebde3acd003e8affdbc95fd26e2307ba743c7f..aa51c0b8859484a0e370c67dea3be8267fc027aa 100644
--- a/app.py
+++ b/app.py
@@ -1,6 +1,7 @@
 import numpy as np
-from flask import Flask, request, jsonify, render_template
+from flask import Flask, request, render_template
 import pickle
+import re
 import json
 
 app = Flask(__name__) #Initialize the flask App
@@ -9,13 +10,67 @@ model = pickle.load(open('model.pickle', 'rb'))
 mlb = pickle.load(open('mlb.pickle','rb'))
 vectorizer = pickle.load(open('vectorizer.pickle','rb'))
 
-@app.route('/')
-def home():
-    return render_template('index.ejs')
+replace = [':', ';', '<', '=', '>', '?', '@', '\\', '_', '`',
+           '\n', '\r', '#', '$', '%', '&', "'", '*', '+', '-', '{', '|', '}', 
+           '\xa0', '¢', '£', '¥', '«', '°', '´', '»', '¼', '½', '×', 'ß', 'à', 'á', 'â', 
+           'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ð', 'ñ', 
+           'ò', 'ó', 'ô', 'ö', '÷', 'ø', 'ù', 'ú', 'û', 'ü', 'ā', 'ă', 'ć', 'č', 'đ', 
+           'ē', 'ė', 'ě', 'ğ', 'ġ', 'ħ', 'ĩ', 'ī', 'ı', 'ĺ', 'ł', 'ń', 'ō', 'œ', 'ś', 
+           'ş', 'š', 'ţ', 'ũ', 'ū', 'ŭ', 'ź', 'ż', 'ž', 'ơ', 'ư', 'ǔ', 'ș', 'ț', 'ɐ', 
+           'ɔ', 'ə', 'ɡ', 'ɪ', 'ɾ', 'ʃ', 'ʊ', 'ʲ', 'ʻ', 'ʼ', 'ˈ', 'ː', '̇', 'μ', 'π', 
+           'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 
+           'р', 'с', 'т', 'у', 'х', 'ч', 'ш', 'ы', 'ь', 'я', 'ё', 'ђ', 'ј', 'њ', 'ה', 
+           'ו', 'י', 'ך', 'ל', 'מ', 'ש', 'ं', 'अ', 'आ', 'उ', 'क', 'ग', 'च', 'ज', 'ट', 
+           'ठ', 'ण', 'त', 'द', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'ष', 
+           'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', '्', 'ক', 'ঠ', 'ত', 'থ', 
+           'দ', 'ধ', 'ন', 'ব', 'র', 'শ', 'ষ', 'স', 'া', 'ি', 'ী', 'ু', 'ে', '্', 'ế', 'ễ', 
+           'ệ', 'ộ', '\u2009', '\u200a', '\u200b', '\u200c', '\u200d', '‐', '–', '—', '―', 
+           '‘', '’', '‚', '“', '”', '…', '′', '″', '⁄', '₤', '€', '₱', '₹', '⅓', '⅞', '←', 
+           '−', '♥', '\u3000', 'あ', 'い', 'う', 'お', 'か', 'が', 'き', 'く', 'け', 'げ', 'こ', 
+           'し', 'す', 'せ', 'に', 'の', 'ほ', 'ま', 'み', 'も', 'や', 'ゅ', 'よ', 'り', 'ん', 
+           'ア', 'イ', 'カ', 'ガ', 'キ', 'ギ', 'ク', 'ケ', 'ゴ', 'サ', 'ザ', 'シ', 'ジ', 'ス', 
+           'ズ', 'ソ', 'ゾ', 'タ', 'ダ', 'ッ', 'ツ', 'デ', 'ト', 'ド', 'ニ', 'ヌ', 'ネ', 'ノ', 
+           'バ', 'パ', 'フ', 'ブ', 'プ', 'ペ', 'ボ', 'ム', 'メ', 'ャ', 'ヤ', 'ュ', 'ラ', 'リ', 
+           'ル', 'ロ', 'ワ', 'ン', 'ヶ', '・', 'ー', 'ㄜ', '一', '七', '三', '世', '中', '丸', 
+           '丹', '举', '久', '之', '也', '予', '二', '井', '京', '人', '仁', '介', '伽', '俊', 
+           '信', '元', '兄', '先', '光', '党', '公', '六', '冯', '刀', '刃', '刘', '劍', '力', 
+           '勇', '務', '化', '十', '千', '原', '友', '司', '合', '名', '向', '君', '吳', '命', 
+           '咲', '四', '団', '図', '国', '圭', '城', '域', '塔', '士', '外', '多', '夢', '大', 
+           '天', '夫', '奪', '女', '姐', '婆', '婉', '子', '孙', '学', '孫', '宇', '安', '宗', 
+           '宙', '宮', '家', '小', '尚', '尾', '山', '島', '州', '巫', '布', '师', '師', '府', 
+           '庭', '弁', '式', '张', '德', '怪', '恵', '悟', '悪', '感', '我', '擊', '教', '方', 
+           '旅', '日', '春', '書', '月', '朋', '望', '木', '本', '村', '条', '杭', '杰', '東', 
+           '林', '柊', '柑', '桑', '森', '椰', '業', '樵', '樹', '正', '武', '歩', '死', '氏', 
+           '民', '永', '汤', '沙', '沢', '治', '法', '泰', '津', '海', '清', '渡', '湖', '源', 
+           '潛', '澎', '澤', '濱', '灣', '点', '焉', '無', '爱', '爷', '物', '犯', '狐', '狙', 
+           '狸', '王', '珊', '琪', '瑜', '瑞', '生', '田', '由', '甲', '町', '畑', '的', '真', 
+           '神', '祭', '穂', '空', '筒', '紀', '綿', '織', '美', '老', '者', '聖', '興', '良', 
+           '花', '芳', '草', '菜', '萩', '葵', '蓝', '薈', '藍', '號', '蛇', '行', '衛', '裕', 
+           '襄', '西', '語', '談', '諜', '識', '讃', '识', '谷', '貴', '賢', '贝', '超', '足', 
+           '轩', '逆', '遊', '達', '邦', '郎', '部', '里', '野', '金', '鈴', '鉄', '鎮', '間', 
+           '防', '陆', '陸', '陽', '隊', '雄', '雪', '青', '響', '首', '香', '馮', '駿', '马', 
+           '髪', '魔', '魚', '鹅', '麻', '黃', '黄', '龍', '龙', '북', '촌', 'fl', '\ufeff',]
+
+def preProcess(text):
+    text = text.lower()
+    text = re.sub(r"what's", "what is ", text)
+    text = re.sub(r"\'s", " ", text)
+    text = re.sub(r"\'ve", " have ", text)
+    text = re.sub(r"can't", "can not ", text)
+    text = re.sub(r"n't", " not ", text)
+    text = re.sub(r"i'm", "i am ", text)
+    text = re.sub(r"\'re", " are ", text)
+    text = re.sub(r"\'d", " would ", text)
+    text = re.sub(r"\'ll", " will ", text)
+    text = re.sub(r"\'scuse", " excuse ", text)
+    text = text.strip(' ')
+    for character in replace:
+        text = text.replace(character,"")
+    return text
 
 @app.route('/predict',methods=['POST'])
 def predict():
-    reqInput = request.form.get('text')
+    reqInput = preProcess(request.form.get('text'))
     vector = vectorizer.transform([reqInput])
     prediction = model.predict(vector)
     output = json.dumps(mlb.inverse_transform(prediction)[0])
diff --git a/build.py b/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f57d9f2410a0b17d7782d47bd07932671324a55
--- /dev/null
+++ b/build.py
@@ -0,0 +1,470 @@
+#Imports
+import re
+from random import seed
+import pandas as pd
+import numpy as np
+import pickle
+import os
+
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.preprocessing import MultiLabelBinarizer
+from sklearn.utils import resample
+from sklearn.multiclass import OneVsRestClassifier
+from numpy import array
+
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
+from datetime import datetime
+
+#Functions for splitting into train/test sets.
+def split_dataset(df, test_size=0.25):
+    movies_train, movies_test = train_test_split(df, test_size=test_size, shuffle=True)
+    return movies_train, movies_test
+
+def get_train_test(df, test_size=0.25):
+    movies_train, movies_test = split_dataset(df, test_size)
+
+    y_train = mlb.transform(movies_train["GenreSplitMain"].tolist())
+    y_test = mlb.transform(movies_test["GenreSplitMain"].tolist())
+
+    vectorizer.fit(movies_train.Plot)
+
+    x_train = vectorizer.transform(movies_train.Plot)
+    x_test = vectorizer.transform(movies_test.Plot)
+
+    return movies_train, movies_test, x_train, y_train, x_test, y_test
+
+mlb = MultiLabelBinarizer()
+vectorizer = CountVectorizer()
+
+
+#Fetching the original dataset.
+# url = "https://github.com/Jamchello/WikiMoviePlots/blob/master/wiki_movie_plots_deduped.csv?raw=true"
+movies = pd.read_csv('./data/Data.csv', delimiter=",")
+
+MIN_MOVIES = 500
+movies['Count'] = 1
+
+genre_count = movies.Genre.value_counts()
+filtered_genre_count = genre_count[genre_count >= MIN_MOVIES].values # Table of genres and count with more than 50 movies
+genres_array = movies.Genre
+genres = pd.DataFrame({'Genre':genres_array})
+genres['Count'] = 1
+genres[['Genre', 'Count']].groupby(['Genre'], as_index=False).sum().sort_values(['Count'], ascending=False)
+genres = genres[['Genre', 'Count']].groupby(['Genre'], as_index=False).sum().sort_values(['Count'], ascending=False)
+top_genres = np.array(genres[genres['Count'] >= MIN_MOVIES]['Genre'])
+
+movies['GenreCorrected'] =movies['Genre'] 
+movies['GenreCorrected']=movies['GenreCorrected'].str.strip()
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' - ', '|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' / ', '|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('/', '|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' & ', '|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(', ', '|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('; ', '|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('bio-pic', 'biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biopic', 'biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biographical', 'biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biodrama', 'biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('bio-drama', 'biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biographic', 'biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(film genre\)', '')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('animated','animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('anime','animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('children\'s','children')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedey','comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\[not in citation given\]','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' set 4,000 years ago in the canadian arctic','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('historical','history')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romantic','romance')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('3-d','animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('3d','animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('viacom 18 motion pictures','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('sci-fi','science_fiction')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('ttriller','thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('.','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('based on radio serial','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' on the early years of hitler','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('sci fi','science_fiction')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('science fiction','science_fiction')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' (30min)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('16 mm film','short')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\[140\]','drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\[144\]','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' for ','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('adventures','adventure')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('kung fu','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('kung-fu','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('martial arts','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('world war ii','war')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('world war i','war')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biography about montreal canadiens star|maurice richard','biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('bholenath movies|cinekorn entertainment','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(volleyball\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('spy film','spy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('anthology film','anthology')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biography fim','biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('avant-garde','avant_garde')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biker film','biker')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('buddy cop','buddy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('buddy film','buddy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedy 2-reeler','comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('films','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('film','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biography of pioneering american photographer eadweard muybridge','biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('british-german co-production','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('bruceploitation','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedy-drama adaptation of the mordecai richler novel','comedy-drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('movies by the mob\|knkspl','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('movies','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('movie','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('coming of age','coming_of_age')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('coming-of-age','coming_of_age')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('drama about child soldiers','drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('(( based).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('(( co-produced).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('(( adapted).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('(( about).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('musical b','musical')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('animationchildren','animation|children')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' period','period')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('drama loosely','drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(aquatics|swimming\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(aquatics|swimming\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace("yogesh dattatraya gosavi's directorial debut \[9\]",'')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace("war-time","war")
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace("wartime","war")
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace("ww1","war")
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('unknown','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace("wwii","war")
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('psychological','psycho')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('rom-coms','romance')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('true crime','crime')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|007','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('slice of life','slice_of_life')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('computer animation','animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('gun fu','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('j-horror','horror')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(shogi|chess\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('afghan war drama','war drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|6 separate stories','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(30min\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' (road bicycle racing)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' v-cinema','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('tv miniseries','tv_miniseries')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|docudrama','\|documentary|drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' in animation','|animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('((adaptation).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('((adaptated).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('((adapted).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('(( on ).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('american football','sports')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dev\|nusrat jahan','sports')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('television miniseries','tv_miniseries')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(artistic\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \|direct-to-dvd','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('history dram','history drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('martial art','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('psycho thriller,','psycho thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|1 girl\|3 suitors','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' \(road bicycle racing\)','')
+filterE = movies['GenreCorrected']=="ero"
+movies.loc[filterE,'GenreCorrected']="adult"
+filterE = movies['GenreCorrected']=="music"
+movies.loc[filterE,'GenreCorrected']="musical"
+filterE = movies['GenreCorrected']=="-"
+movies.loc[filterE,'GenreCorrected']=''
+filterE = movies['GenreCorrected']=="comedy–drama"
+movies.loc[filterE,'GenreCorrected'] = "comedy|drama"
+filterE = movies['GenreCorrected']=="comedy–horror"
+movies.loc[filterE,'GenreCorrected'] = "comedy|horror"
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(' ','|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace(',','|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('-','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actionadventure','action|adventure')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actioncomedy','action|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actiondrama','action|drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actionlove','action|love')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actionmasala','action|masala')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actionchildren','action|children')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('fantasychildren\|','fantasy|children')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('fantasycomedy','fantasy|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('fantasyperiod','fantasy|period')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('cbctv_miniseries','tv_miniseries')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramacomedy','drama|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramacomedysocial','drama|comedy|social')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramathriller','drama|thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedydrama','comedy|drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramathriller','drama|thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedyhorror','comedy|horror')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('sciencefiction','science_fiction')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('adventurecomedy','adventure|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('animationdrama','animation|drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|\|','|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('muslim','religious')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('thriler','thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('crimethriller','crime|thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('fantay','fantasy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actionthriller','action|thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedysocial','comedy|social')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('martialarts','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|\(children\|poker\|karuta\)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('epichistory','epic|history')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('erotica','adult')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('erotic','adult')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('((\|produced\|).+)','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('chanbara','chambara')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('comedythriller','comedy|thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biblical','religious')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biblical','religious')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('colour\|yellow\|productions\|eros\|international','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|directtodvd','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('liveaction','live|action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('melodrama','drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('superheroes','superheroe')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('gangsterthriller','gangster|thriller')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('heistcomedy','comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('heist','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('historic','history')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('historydisaster','history|disaster')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('warcomedy','war|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('westerncomedy','western|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('ancientcostume','costume')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('computeranimation','animation')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramatic','drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('familya','family')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('familya','family')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramedy','drama|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('dramaa','drama')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('famil\|','family')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('superheroe','superhero')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('biogtaphy','biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('devotionalbiography','devotional|biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('docufiction','documentary|fiction')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('familydrama','family|drama')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('espionage','spy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('supeheroes','superhero')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romancefiction','romance|fiction')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('horrorthriller','horror|thriller')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('suspensethriller','suspense|thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('musicaliography','musical|biography')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('triller','thriller')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|\(fiction\)','|fiction')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romanceaction','romance|action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romancecomedy','romance|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romancehorror','romance|horror')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romcom','romance|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('rom\|com','romance|comedy')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('satirical','satire')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('science_fictionchildren','science_fiction|children')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('homosexual','adult')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('sexual','adult')
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('mockumentary','documentary')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('periodic','period')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('romanctic','romantic')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('politics','political')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('samurai','martial_arts')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('tv_miniseries','series')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('serial','series')
+
+filterE = movies['GenreCorrected']=="musical–comedy"
+movies.loc[filterE,'GenreCorrected'] = "musical|comedy"
+
+filterE = movies['GenreCorrected']=="roman|porno"
+movies.loc[filterE,'GenreCorrected'] = "adult"
+
+
+filterE = movies['GenreCorrected']=="action—masala"
+movies.loc[filterE,'GenreCorrected'] = "action|masala"
+
+
+filterE = movies['GenreCorrected']=="horror–thriller"
+movies.loc[filterE,'GenreCorrected'] = "horror|thriller"
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('family','children')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('martial_arts','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('horror','thriller')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('war','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('adventure','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('science_fiction','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('western','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('western','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('noir','black')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('spy','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('superhero','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('social','')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('suspense','action')
+
+
+filterE = movies['GenreCorrected']=="drama|romance|adult|children"
+movies.loc[filterE,'GenreCorrected'] = "drama|romance|adult"
+
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('\|–\|','|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.strip(to_strip='\|')
+movies['GenreCorrected']=movies['GenreCorrected'].str.replace('actionner','action')
+movies['GenreCorrected']=movies['GenreCorrected'].str.strip()
+
+moviesGenre = movies[['GenreCorrected','Count']].groupby(['GenreCorrected']).count()
+
+movies[['GenreCorrected','Count']].groupby(['GenreCorrected'], as_index=False).count().shape[0]
+
+movies['GenreSplit']=movies['GenreCorrected'].str.split('|')
+movies['GenreSplit']= movies['GenreSplit'].apply(np.sort).apply(np.unique)
+movies[['GenreCorrected', 'GenreSplit']][100:120]
+
+genres_array = np.array([])
+
+for i in range(0,movies.shape[0]-1):
+    genres_array = np.concatenate((genres_array, movies['GenreSplit'][i]))
+
+genres = pd.DataFrame({'Genre':genres_array})
+genres['Count'] = 1
+genres[['Genre', 'Count']].groupby(['Genre'], as_index=False).sum().sort_values(['Count'], ascending=False)
+genres = genres[['Genre', 'Count']].groupby(['Genre'], as_index=False).sum().sort_values(['Count'], ascending=False)
+genres = genres[genres['Genre'] != '']  # Remove 'unknown' genre
+print(genres.head(25))
+
+genres_count = np.array(genres[genres.Count >= MIN_MOVIES].Count)
+genres_list = np.array(genres[genres.Count >= MIN_MOVIES].Genre)
+
+MIN_MOVIES = 2000  # Minimum number of movies per genre
+
+main_genres = np.array(genres[genres.Count >= MIN_MOVIES].Genre)  # List of genres that will be used for classification
+movies['GenreSplitMain'] = movies['GenreSplit'].apply(lambda x: x[np.in1d(x, main_genres)])
+
+main_genres_count = np.array(genres[genres.Count >= MIN_MOVIES].Count)
+
+movies['GenreCount'] = movies['GenreSplitMain'].apply(len)
+movies_df = movies[movies['GenreCount'] != 0][["Plot", "GenreSplitMain"]]
+
+encoded_genres = movies_df.GenreSplitMain.apply(lambda x: '-'.join(x)).str.get_dummies(sep='-')
+
+
+def clean_text(text):
+    text = text.lower()
+    text = re.sub(r"what's", "what is ", text)
+    text = re.sub(r"\'s", " ", text)
+    text = re.sub(r"\'ve", " have ", text)
+    text = re.sub(r"can't", "can not ", text)
+    text = re.sub(r"n't", " not ", text)
+    text = re.sub(r"i'm", "i am ", text)
+    text = re.sub(r"\'re", " are ", text)
+    text = re.sub(r"\'d", " would ", text)
+    text = re.sub(r"\'ll", " will ", text)
+    text = re.sub(r"\'scuse", " excuse ", text)
+    text = text.strip(' ')
+    return text
+
+replace = [':', ';', '<', '=', '>', '?', '@', '\\', '_', '`',
+           '\n', '\r', '#', '$', '%', '&', "'", '*', '+', '-', '{', '|', '}', 
+           '\xa0', '¢', '£', '¥', '«', '°', '´', '»', '¼', '½', '×', 'ß', 'à', 'á', 'â', 
+           'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ð', 'ñ', 
+           'ò', 'ó', 'ô', 'ö', '÷', 'ø', 'ù', 'ú', 'û', 'ü', 'ā', 'ă', 'ć', 'č', 'đ', 
+           'ē', 'ė', 'ě', 'ğ', 'ġ', 'ħ', 'ĩ', 'ī', 'ı', 'ĺ', 'ł', 'ń', 'ō', 'œ', 'ś', 
+           'ş', 'š', 'ţ', 'ũ', 'ū', 'ŭ', 'ź', 'ż', 'ž', 'ơ', 'ư', 'ǔ', 'ș', 'ț', 'ɐ', 
+           'ɔ', 'ə', 'ɡ', 'ɪ', 'ɾ', 'ʃ', 'ʊ', 'ʲ', 'ʻ', 'ʼ', 'ˈ', 'ː', '̇', 'μ', 'π', 
+           'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 
+           'р', 'с', 'т', 'у', 'х', 'ч', 'ш', 'ы', 'ь', 'я', 'ё', 'ђ', 'ј', 'њ', 'ה', 
+           'ו', 'י', 'ך', 'ל', 'מ', 'ש', 'ं', 'अ', 'आ', 'उ', 'क', 'ग', 'च', 'ज', 'ट', 
+           'ठ', 'ण', 'त', 'द', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'ष', 
+           'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', '्', 'ক', 'ঠ', 'ত', 'থ', 
+           'দ', 'ধ', 'ন', 'ব', 'র', 'শ', 'ষ', 'স', 'া', 'ি', 'ী', 'ু', 'ে', '্', 'ế', 'ễ', 
+           'ệ', 'ộ', '\u2009', '\u200a', '\u200b', '\u200c', '\u200d', '‐', '–', '—', '―', 
+           '‘', '’', '‚', '“', '”', '…', '′', '″', '⁄', '₤', '€', '₱', '₹', '⅓', '⅞', '←', 
+           '−', '♥', '\u3000', 'あ', 'い', 'う', 'お', 'か', 'が', 'き', 'く', 'け', 'げ', 'こ', 
+           'し', 'す', 'せ', 'に', 'の', 'ほ', 'ま', 'み', 'も', 'や', 'ゅ', 'よ', 'り', 'ん', 
+           'ア', 'イ', 'カ', 'ガ', 'キ', 'ギ', 'ク', 'ケ', 'ゴ', 'サ', 'ザ', 'シ', 'ジ', 'ス', 
+           'ズ', 'ソ', 'ゾ', 'タ', 'ダ', 'ッ', 'ツ', 'デ', 'ト', 'ド', 'ニ', 'ヌ', 'ネ', 'ノ', 
+           'バ', 'パ', 'フ', 'ブ', 'プ', 'ペ', 'ボ', 'ム', 'メ', 'ャ', 'ヤ', 'ュ', 'ラ', 'リ', 
+           'ル', 'ロ', 'ワ', 'ン', 'ヶ', '・', 'ー', 'ㄜ', '一', '七', '三', '世', '中', '丸', 
+           '丹', '举', '久', '之', '也', '予', '二', '井', '京', '人', '仁', '介', '伽', '俊', 
+           '信', '元', '兄', '先', '光', '党', '公', '六', '冯', '刀', '刃', '刘', '劍', '力', 
+           '勇', '務', '化', '十', '千', '原', '友', '司', '合', '名', '向', '君', '吳', '命', 
+           '咲', '四', '団', '図', '国', '圭', '城', '域', '塔', '士', '外', '多', '夢', '大', 
+           '天', '夫', '奪', '女', '姐', '婆', '婉', '子', '孙', '学', '孫', '宇', '安', '宗', 
+           '宙', '宮', '家', '小', '尚', '尾', '山', '島', '州', '巫', '布', '师', '師', '府', 
+           '庭', '弁', '式', '张', '德', '怪', '恵', '悟', '悪', '感', '我', '擊', '教', '方', 
+           '旅', '日', '春', '書', '月', '朋', '望', '木', '本', '村', '条', '杭', '杰', '東', 
+           '林', '柊', '柑', '桑', '森', '椰', '業', '樵', '樹', '正', '武', '歩', '死', '氏', 
+           '民', '永', '汤', '沙', '沢', '治', '法', '泰', '津', '海', '清', '渡', '湖', '源', 
+           '潛', '澎', '澤', '濱', '灣', '点', '焉', '無', '爱', '爷', '物', '犯', '狐', '狙', 
+           '狸', '王', '珊', '琪', '瑜', '瑞', '生', '田', '由', '甲', '町', '畑', '的', '真', 
+           '神', '祭', '穂', '空', '筒', '紀', '綿', '織', '美', '老', '者', '聖', '興', '良', 
+           '花', '芳', '草', '菜', '萩', '葵', '蓝', '薈', '藍', '號', '蛇', '行', '衛', '裕', 
+           '襄', '西', '語', '談', '諜', '識', '讃', '识', '谷', '貴', '賢', '贝', '超', '足', 
+           '轩', '逆', '遊', '達', '邦', '郎', '部', '里', '野', '金', '鈴', '鉄', '鎮', '間', 
+           '防', '陆', '陸', '陽', '隊', '雄', '雪', '青', '響', '首', '香', '馮', '駿', '马', 
+           '髪', '魔', '魚', '鹅', '麻', '黃', '黄', '龍', '龙', '북', '촌', 'fl', '\ufeff',]
+
+
+movies_df['Plot'] = movies_df['Plot'].apply(clean_text)
+for character in replace:
+  movies_df['Plot'] = movies_df['Plot'].apply(lambda x: x.replace(character, ''))
+
+mlb.fit([main_genres])
+
+#Upsampling
+
+genre_count_table = pd.DataFrame(columns=['Genre', 'MovieCount'])
+i = 0
+highest_movie_count = 0
+for genre in main_genres:
+    genre_count_table.loc[i, 'Genre'] = genre
+    genre_count_table.loc[i, 'MovieCount'] = movies_df[encoded_genres[genre] == 1].shape[0]
+    if movies_df[encoded_genres[genre] == 1].shape[0] >= highest_movie_count:
+        highest_movie_count = movies_df[encoded_genres[genre] == 1].shape[0]
+    i += 1
+
+resampled = []
+for genre in main_genres:
+    df = movies_df[encoded_genres[genre] == 1]
+    if len(df) ==  highest_movie_count:
+        df_upsample = df
+    else:
+        print("Resampling {}: [{}] to {} samples".format(genre, len(df), highest_movie_count))
+        df_upsample = resample(df, replace=True, n_samples=highest_movie_count)
+        
+    resampled.append(df_upsample)
+
+upsampled_df = pd.concat(resampled)
+
+movies = upsampled_df.copy()
+optimal_val_split_lr = 0.1
+
+movies_train, movies_test, x_train_lr, y_train_lr, x_test_lr, y_test_lr = get_train_test(upsampled_df, test_size=optimal_val_split_lr)
+lr_classifier = LogisticRegression(max_iter=2000)
+classifier = OneVsRestClassifier(lr_classifier)
+classifier.fit(x_train_lr, y_train_lr)
+
+
+for item in ['mlb', 'vectorizer','model']:
+  with open(f'{item}.pickle', 'wb') as handle:
+    pickle.dump(item, handle)
+
+#Automatically restart the services: MAY NEED TO CHANGE TO PUSH TO GITHUB AND THEN SSHING INTO THE SERVER ETC ETC
+
+# print('Restarting services')
+# os.chdir("~/Dev/coursework")
+# os.system("forever restart;cd NodeAPI;pm2 restart /NodeAPI/index.js")
+# print('Restarted services')
+
+print('Pushing trained pickle models...')
+os.system("git add mlb.pickle vectorizer.pickle model.pickle")
+os.system("git commit -m \"Update re-trained models\"")
+os.system("git push -u origin master")
+
diff --git a/data/.gitkeep b/data/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/data/Data.csv b/data/Data.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fd67c9bc1014c7e86ad0ac4f753f2ffd53b1521e
Binary files /dev/null and b/data/Data.csv differ
diff --git a/data/test.txt b/data/test.txt
deleted file mode 100644
index 524acfffa760fd0b8c1de7cf001f8dd348b399d8..0000000000000000000000000000000000000000
--- a/data/test.txt
+++ /dev/null
@@ -1 +0,0 @@
-Test file
diff --git a/mlb.pickle b/mlb.pickle
index de75a1db7de12d7f07fef6d54f72d35f7c5eed71..6a74ded4d09524c9ba37f45765efa03d06f32ca5 100644
Binary files a/mlb.pickle and b/mlb.pickle differ
diff --git a/model.pickle b/model.pickle
index 85c8237522740a4c73a3244bded7a692e37e5e91..7482bf47b53d5482ff76918cca53894ed08f4782 100644
Binary files a/model.pickle and b/model.pickle differ
diff --git a/push_updates.sh b/push_updates.sh
deleted file mode 100644
index 258b40351fdebec507ee09a5b6d26f69c27f03de..0000000000000000000000000000000000000000
--- a/push_updates.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-ehco "Pushing updates to repo..."
diff --git a/update_model.sh b/update_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2dd62e4943016c4e08664ebf9a3682f9e4979d3a
--- /dev/null
+++ b/update_model.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+echo "Pulling updates..."
+git pull
+
+echo "Restarting server..."
+pm2 restart all
diff --git a/vectorizer.pickle b/vectorizer.pickle
index 9fb289c932bdb6aec603b028b1d59ad4e5fa3796..bc3bcee39a706c7887dd62eb6d0ece7bec214ebd 100644
Binary files a/vectorizer.pickle and b/vectorizer.pickle differ