diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index daaf1f0779728a5c9acec5d9e21239d91589c9cb..0000000000000000000000000000000000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,18 +0,0 @@ -image: python:latest -services: - - docker:dind - -update-data: - stage: build - tags: - - updating - script: - - python -V - - pip install numpy - - pip install pandas - - pip install sklearn - - pip install nltk - - python update.py - only: - changes: - - data/* diff --git a/NodeAPI/index.js b/NodeAPI/index.js index d821d539c846fc8073b11e70c16c0df0fef0728d..7c861f843072907422a60807be040499d7d4e4a2 100644 --- a/NodeAPI/index.js +++ b/NodeAPI/index.js @@ -45,19 +45,38 @@ app.post('/predict', (req,res) => { // Get the data to be stored var plot = req.body.plot; - var prediction = JSON.parse(body).predictionText; + var prediction = JSON.parse(JSON.parse(body).predictionText); // Get rcord to be inserted var rec = {'Plot':plot, 'Prediction': prediction}; // Insert the record to the collection, throw error if insert unsuccessful collection.insert(rec, {w:1}, function(err, result) {}); + }, function(){ + db.close(); }); res.render('predictions',{predictions:JSON.parse(JSON.parse(body).predictionText)}) }); -}) - - + console.log(req.body.plot) + // res.render('index', {}); + +}); + +app.get('/submissions', function (req, res) { + var resultArray = []; + MongoClient.connect(url, function(err, db) { + if(err) { return console.dir(err); } + + var cursor = db.db('ClusterNLP').collection('Pred_Records').find(); + cursor.forEach(function(doc, err){ + resultArray.push(doc) + }, function(){ + db.close(); + res.render('submissions', {items: resultArray}); + }); + }); + +}); \ No newline at end of file diff --git a/NodeAPI/public/css/style.css b/NodeAPI/public/css/style.css index b6f0933e70578175e6e9a0ca960a2965d9db1a58..d6d7f62bb632b1dab57a57dfe3a77c5a625b0b05 100644 --- a/NodeAPI/public/css/style.css +++ b/NodeAPI/public/css/style.css @@ -71,7 +71,8 @@ fieldset { -ms-transition: all 0.1s ease-in-out; -o-transition: all 0.1s ease-in-out; transition: all 0.1s ease-in-out; - color: black; + color: #444; + margin-top: 4%; } .ghost-button:hover { border:2px solid #e95e01; @@ -86,7 +87,6 @@ p { width: 100%; height: 100%; color: white; - display: grid; background-size: 300% 300%; /* background-image: linear-gradient( -90deg, @@ -132,4 +132,67 @@ p { border-color: #007299; color: #007299; box-shadow: none; +} + +/* Add a black background color to the top navigation */ +.topnav { + background-color: cornflowerblue; + overflow: hidden; +} + +/* Style the links inside the navigation bar */ +.topnav a { + float: left; + color: #f2f2f2; + text-align: center; + padding: 14px 16px; + font-size: 1.3em; + text-decoration: none; + font-family: Calibri, 'Trebuchet MS', sans-serif; + font-weight: bold; +} + +/* Change the color of links on hover */ +.topnav a:hover { + background-color: #ddd; + color: #e95e01; + text-decoration: none; +} + +/* Add a color to the active/current link */ +.topnav a.active { + background-color: #e95e01; + color: white; +} + +.title{ + color: cornflowerblue; + font-size: 2.2em; + margin-top: 5%; +} + +.subtitle{ + color: #e95e01; + font-size: 1.2em; +} + +table, th, td { + border: 1px solid black; + color: black; +} + +td{ + min-width: 10em; + font-family: Calibri, 'Trebuchet MS', sans-serif; +} + +th{ + color: #e95e01; + font-size: 1.2em; +} + +.thead{ + font-family: Calibri, 'Trebuchet MS', sans-serif; + color: cornflowerblue; + font-size: 2em; } \ No newline at end of file diff --git a/NodeAPI/views/index.ejs b/NodeAPI/views/index.ejs index 0ad075c2845356d49185883ecfe0dcf95f107886..fa58fa4e0362edc7f631f0807842e34b1dc3d046 100644 --- a/NodeAPI/views/index.ejs +++ b/NodeAPI/views/index.ejs @@ -13,10 +13,14 @@ </head> <div class="bg"> + <div class="topnav"> + <a class="active" href="/">NLP11</a> + <a href="/submissions">Submissions</a> + </div> <body class="text-center" > <div class="row"> - <h1 style="color: cornflowerblue;">NLP Group 11: Predict movie genres by plot</h1> - <b style="color: #e95e01; font-size: large;">Enter a movie plot text to receive a prediction of the grenre(s)</b> <br> + <h1 class="title">NLP Group 11: Predict movie genres by plot</h1> + <b class="subtitle">Enter a movie plot text to receive a prediction of the grenre(s)</b> <br> <!-- <h2>Enter plot text</h2> --> <!-- <div>Any Address holding a balance of SIMP is able allow the backend to burn their tokens. <b>MAKE SURE METAMASK BROWSER EXTENSION IS CONNECTED TO THE WEBSITE (TOP RIGHT)</b></div> --> diff --git a/NodeAPI/views/predictions.ejs b/NodeAPI/views/predictions.ejs index 8dda37800fffb9b47351b575e04c95c2d265d9eb..bc90bf58425b560a406aa84cee3aeb42217d711e 100644 --- a/NodeAPI/views/predictions.ejs +++ b/NodeAPI/views/predictions.ejs @@ -15,6 +15,10 @@ </head> <div class="bg"> + <div class="topnav"> + <a href="/">NLP11</a> + <a href="/submissions">Submissions</a> + </div> <body class="text-center" > <div class="row"> <h1 style="color: cornflowerblue;">Predicted Genres</h1> diff --git a/NodeAPI/views/submissions.ejs b/NodeAPI/views/submissions.ejs new file mode 100644 index 0000000000000000000000000000000000000000..8e95f03f46d5d2bf22a8138d77eea0e8e71bd352 --- /dev/null +++ b/NodeAPI/views/submissions.ejs @@ -0,0 +1,28 @@ +<!DOCTYPE html> +<html> + <head> + <title>NLP Demo</title> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel="stylesheet" type="text/css" href="/css/style.css"> + <link href='https://fonts.googleapis.com/css?family=Open+Sans:300' rel='stylesheet' type='text/css'> + <!-- Latest compiled and minified CSS --> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css"> + <!-- jQuery library --> + <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> + </head> + + <div class="bg"> + <div class="topnav"> + <a href="/">NLP11</a> + <a class="active" href="/submissions">Submissions</a> + </div> + <body> + + <h1 class = "thead text-center">Movie Submissions and Predictions</h1> + <%- include("table", {items}); %> + + </body> + </div> +</html> + diff --git a/NodeAPI/views/table.ejs b/NodeAPI/views/table.ejs new file mode 100644 index 0000000000000000000000000000000000000000..5f802e5eda4a0027ee8de6c05a86c502903b8c85 --- /dev/null +++ b/NodeAPI/views/table.ejs @@ -0,0 +1,17 @@ +<table class="table"> + <tr> + <th>Prediction</th> + <th>Plot</th> + </tr> + <%if (items.length >0){ %> + <% items.forEach(function(row) { %> + <tr> + <td><%= row.Prediction %></td> + <td><%= row.Plot %></td> + </tr> + <% })}else{ %> + <tr> + <td colspan="6">No Record Found</td> + </tr> + <% } %> + </table> \ No newline at end of file diff --git a/app.py b/app.py index 2169d4f0f1bae4f8c94e08a9fa8a60624127319d..aa51c0b8859484a0e370c67dea3be8267fc027aa 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,7 @@ import numpy as np from flask import Flask, request, render_template import pickle +import re import json app = Flask(__name__) #Initialize the flask App @@ -9,9 +10,67 @@ model = pickle.load(open('model.pickle', 'rb')) mlb = pickle.load(open('mlb.pickle','rb')) vectorizer = pickle.load(open('vectorizer.pickle','rb')) +replace = [':', ';', '<', '=', '>', '?', '@', '\\', '_', '`', + '\n', '\r', '#', '$', '%', '&', "'", '*', '+', '-', '{', '|', '}', + '\xa0', '¢', '£', 'Â¥', '«', '°', '´', '»', '¼', '½', '×', 'ß', 'à ', 'á', 'â', + 'ã', 'ä', 'Ã¥', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'Ã', 'î', 'ï', 'ð', 'ñ', + 'ò', 'ó', 'ô', 'ö', '÷', 'ø', 'ù', 'ú', 'û', 'ü', 'Ä', 'ă', 'ć', 'Ä', 'Ä‘', + 'Ä“', 'Ä—', 'Ä›', 'ÄŸ', 'Ä¡', 'ħ', 'Ä©', 'Ä«', 'ı', 'ĺ', 'Å‚', 'Å„', 'Å', 'Å“', 'Å›', + 'ÅŸ', 'Å¡', 'Å£', 'Å©', 'Å«', 'Å', 'ź', 'ż', 'ž', 'Æ¡', 'ư', 'Ç”', 'È™', 'È›', 'É', + 'É”', 'É™', 'É¡', 'ɪ', 'ɾ', 'ʃ', 'ÊŠ', 'ʲ', 'Ê»', 'ʼ', 'ˈ', 'Ë', '̇', 'μ', 'Ï€', + 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', + 'Ñ€', 'Ñ', 'Ñ‚', 'у', 'Ñ…', 'ч', 'ш', 'Ñ‹', 'ÑŒ', 'Ñ', 'Ñ‘', 'Ñ’', 'ј', 'Ñš', '×”', + 'ו', '×™', 'ך', 'ל', 'מ', 'ש', 'ं', 'अ', 'आ', 'उ', 'क', 'ग', 'च', 'ज', 'ट', + 'ठ', 'ण', 'त', 'द', 'न', 'प', 'फ', 'ब', 'à¤', 'म', 'य', 'र', 'ल', 'व', 'ष', + 'स', 'ह', 'ा', 'ि', 'ी', 'à¥', 'ू', 'ृ', 'े', 'ै', 'ो', 'à¥', 'ক', 'ঠ', 'ত', 'থ', + 'দ', 'ধ', 'ন', 'ব', 'র', 'শ', 'ষ', 'স', 'া', 'ি', 'à§€', 'à§', 'ে', 'à§', 'ế', 'á»…', + 'ệ', 'á»™', '\u2009', '\u200a', '\u200b', '\u200c', '\u200d', 'â€', '–', '—', '―', + '‘', '’', '‚', '“', 'â€', '…', '′', '″', 'â„', '₤', '€', '₱', '₹', 'â…“', 'â…ž', 'â†', + '−', '♥', '\u3000', 'ã‚', 'ã„', 'ã†', 'ãŠ', 'ã‹', 'ãŒ', 'ã', 'ã', 'ã‘', 'ã’', 'ã“', + 'ã—', 'ã™', 'ã›', 'ã«', 'ã®', 'ã»', 'ã¾', 'ã¿', 'ã‚‚', 'ã‚„', 'ã‚…', 'よ', 'り', 'ã‚“', + 'ã‚¢', 'イ', 'ã‚«', 'ガ', 'ã‚', 'ã‚®', 'ク', 'ケ', 'ã‚´', 'サ', 'ã‚¶', 'ã‚·', 'ジ', 'ス', + 'ズ', 'ソ', 'ゾ', 'ã‚¿', 'ダ', 'ッ', 'ツ', 'デ', 'ト', 'ド', 'ニ', 'ヌ', 'ãƒ', 'ノ', + 'ãƒ', 'パ', 'フ', 'ブ', 'プ', 'ペ', 'ボ', 'ム', 'メ', 'ャ', 'ヤ', 'ュ', 'ラ', 'リ', + 'ル', 'ãƒ', 'ワ', 'ン', 'ヶ', '・', 'ー', 'ㄜ', '一', '七', '三', '世', 'ä¸', '丸', + '丹', '举', 'ä¹…', '之', '也', '予', '二', '井', '京', '人', 'ä»', '介', 'ä¼½', '俊', + 'ä¿¡', 'å…ƒ', 'å…„', 'å…ˆ', 'å…‰', 'å…š', 'å…¬', 'å…', '冯', '刀', '刃', '刘', 'åŠ', '力', + '勇', 'å‹™', '化', 'å', 'åƒ', '原', 'å‹', 'å¸', 'åˆ', 'å', 'å‘', 'å›', 'å³', '命', + 'å’²', 'å››', '団', '図', '国', 'åœ', '城', '域', 'å¡”', '士', '外', '多', '夢', '大', + '天', '夫', '奪', '女', 'å§', '婆', '婉', 'å', 'å™', 'å¦', 'å«', '宇', '安', 'å®—', + 'å®™', 'å®®', 'å®¶', 'å°', 'å°š', 'å°¾', 'å±±', 'å³¶', 'å·ž', 'å·«', '布', '师', '師', '府', + 'åº', 'å¼', 'å¼', 'å¼ ', 'å¾·', '怪', 'æµ', '悟', '悪', '感', '我', '擊', 'æ•™', 'æ–¹', + 'æ—…', 'æ—¥', '春', '書', '月', '朋', '望', '木', '本', 'æ‘', 'æ¡', 'æ', 'æ°', 'æ±', + 'æž—', '柊', '柑', 'æ¡‘', '森', '椰', 'æ¥', '樵', '樹', 'æ£', 'æ¦', 'æ©', 'æ»', 'æ°', + 'æ°‘', 'æ°¸', '汤', 'æ²™', 'æ²¢', 'æ²»', '法', 'æ³°', 'æ´¥', 'æµ·', '清', '渡', 'æ¹–', 'æº', + 'æ½›', '澎', '澤', '濱', 'ç£', '点', '焉', 'ç„¡', '爱', '爷', '物', '犯', 'ç‹', 'ç‹™', + '狸', '王', 'çŠ', 'çª', '瑜', '瑞', '生', 'ç”°', 'ç”±', '甲', '町', 'ç•‘', 'çš„', '真', + '神', 'ç¥', 'ç©‚', '空', 'ç’', 'ç´€', 'ç¶¿', 'ç¹”', '美', 'è€', '者', 'è–', '興', '良', + '花', '芳', 'è‰', 'èœ', 'è©', '葵', 'è“', 'è–ˆ', 'è—', '號', '蛇', '行', 'è¡›', '裕', + '襄', '西', '語', '談', '諜', 'è˜', '讃', '识', 'è°·', 'è²´', 'è³¢', 'è´', 'è¶…', 'è¶³', + '轩', '逆', 'éŠ', 'é”', '邦', '郎', '部', '里', '野', '金', '鈴', '鉄', '鎮', 'é–“', + '防', '陆', '陸', '陽', '隊', '雄', '雪', 'é’', '響', '首', '香', '馮', 'é§¿', '马', + '髪', 'é”', 'éš', 'é¹…', '麻', '黃', '黄', 'é¾', 'é¾™', 'ë¶', 'ì´Œ', 'fl', '\ufeff',] + +def preProcess(text): + text = text.lower() + text = re.sub(r"what's", "what is ", text) + text = re.sub(r"\'s", " ", text) + text = re.sub(r"\'ve", " have ", text) + text = re.sub(r"can't", "can not ", text) + text = re.sub(r"n't", " not ", text) + text = re.sub(r"i'm", "i am ", text) + text = re.sub(r"\'re", " are ", text) + text = re.sub(r"\'d", " would ", text) + text = re.sub(r"\'ll", " will ", text) + text = re.sub(r"\'scuse", " excuse ", text) + text = text.strip(' ') + for character in replace: + text = text.replace(character,"") + return text + @app.route('/predict',methods=['POST']) def predict(): - reqInput = request.form.get('text') + reqInput = preProcess(request.form.get('text')) vector = vectorizer.transform([reqInput]) prediction = model.predict(vector) output = json.dumps(mlb.inverse_transform(prediction)[0]) diff --git a/mlb.pkl b/mlb.pkl deleted file mode 100644 index b17176ce411b5b44cad65e0e6dccbd6841a06eab..0000000000000000000000000000000000000000 Binary files a/mlb.pkl and /dev/null differ diff --git a/model.pkl b/model.pkl deleted file mode 100644 index d72caa1773bd852c3795bd47830e83a3473932b1..0000000000000000000000000000000000000000 Binary files a/model.pkl and /dev/null differ