diff --git a/app.ipynb b/app.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bca20253bdd8b347e65fec258d5d8a9357ece113 --- /dev/null +++ b/app.ipynb @@ -0,0 +1,698 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from flask import Flask, jsonify, request\n", + "import json\n", + "\n", + "import datasets, evaluate\n", + "from transformers import pipeline\n", + "import torch\n", + "from datetime import datetime\n", + "\n", + "import numpy as np\n", + "import re\n", + "\n", + "from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, TrainingArguments, Trainer, EarlyStoppingCallback" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "CW_datasets = datasets.load_dataset(\"surrey-nlp/PLOD-CW\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = CW_datasets[\"train\"]\n", + "test_dataset = CW_datasets[\"test\"]\n", + "\n", + "label_encoding = {\"B-O\": 0, \"B-AC\": 1, \"B-LF\": 2, \"I-LF\": 3}\n", + "\n", + "metric = evaluate.load(\"seqeval\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def create_label_list(dataset, label_encoding):\n", + " label_list = []\n", + " for sample in dataset[\"ner_tags\"]:\n", + " label_list.append([label_encoding[tag] for tag in sample])\n", + " return label_list" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def turn_dict_to_list_of_dict(d):\n", + " new_list = []\n", + " for labels, inputs in zip(d[\"labels\"], d[\"input_ids\"]):\n", + " entry = {\"input_ids\": inputs, \"labels\": labels}\n", + " new_list.append(entry)\n", + " return new_list" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def tokenize_and_align_labels(tokenizer, dataset, label_list, max_length=500, truncation=True, is_split_into_words=True):\n", + "\n", + " tokenized_inputs = tokenizer(\n", + " dataset[\"tokens\"],\n", + " max_length=max_length,\n", + " truncation=truncation,\n", + " is_split_into_words=is_split_into_words)\n", + "\n", + " labels = []\n", + " for i, labels_per_sample in enumerate(label_list):\n", + " word_ids = tokenized_inputs.word_ids(batch_index=i)\n", + " label_ids, previous_word_idx = [], None\n", + "\n", + "\n", + " for word_idx in word_ids:\n", + " if word_idx is None:\n", + " label_ids.append(-100)\n", + " elif word_idx != previous_word_idx:\n", + " label_ids.append(labels_per_sample[word_idx])\n", + " else:\n", + " label_ids.append(labels_per_sample[word_idx])\n", + " previous_word_idx = word_idx\n", + "\n", + " labels.append(label_ids)\n", + "\n", + " tokenized_inputs[\"labels\"] = labels\n", + " return tokenized_inputs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "label_list = [\"B-O\", \"B-AC\", \"B-LF\", \"I-LF\"]\n", + "\n", + "def compute_metrics(p):\n", + " predictions, labels = p\n", + " predictions = np.argmax(predictions, axis=2)\n", + " true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]\n", + " true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]\n", + " results = metric.compute(predictions=true_predictions, references=true_labels)\n", + " return {\"precision\": results[\"overall_precision\"], \"recall\": results[\"overall_recall\"], \"f1\": results[\"overall_f1\"], \"accuracy\": results[\"overall_accuracy\"]}" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def predict_tags(tokens, tokenizer, model, label_encoding, max_length=512):\n", + " # Tokenize input tokens\n", + " inputs = tokenizer(tokens, is_split_into_words=True, return_tensors=\"pt\", max_length=max_length, padding=\"max_length\", truncation=True)\n", + " input_ids = inputs[\"input_ids\"]\n", + " attention_mask = inputs[\"attention_mask\"]\n", + "\n", + " # Predict using the model\n", + " with torch.no_grad():\n", + " outputs = model(input_ids, attention_mask=attention_mask)\n", + "\n", + " # Get predicted labels\n", + " predictions = torch.argmax(outputs.logits, dim=2)\n", + " predicted_labels = predictions[0].tolist()\n", + "\n", + " # Decode predicted labels, ignoring padding and special tokens\n", + " reversed_label_encoding = {v: k for k, v in label_encoding.items()}\n", + " decoded_labels = [reversed_label_encoding[label] for label in predicted_labels if label != -100]\n", + "\n", + " # Combine tokens and labels, excluding padding tokens\n", + " decoded_labels = [reversed_label_encoding[label] for label, token in zip(predicted_labels, tokens) if token not in [\"[PAD]\", \"[CLS]\", \"[SEP]\"]]\n", + "\n", + " return list(zip(tokens, decoded_labels))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def save_results(inputs, ner_pred, filename='log_file.txt'):\n", + " timestamp = datetime.now().isoformat()\n", + " results = {\n", + " 'inputs': inputs,\n", + " 'ner_predictions': ner_pred,\n", + " 'timestamp': timestamp\n", + " }\n", + " with open(filename, 'a') as file:\n", + " file.write(json.dumps(results) + '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def load_results(filename='log_file.txt'):\n", + " with open(filename, 'r') as file:\n", + " lines = file.readlines()\n", + " results = [json.loads(line) for line in lines]\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def split_string(s):\n", + " return re.findall(r\"\\b\\w+\\b|\\S\", s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Beginning of Flask endpoint code" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "app = Flask(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "@app.route('/test', methods=['POST'])\n", + "# Test endpoint for Q3, showing POST functionality\n", + "# Example command to run:\n", + "# curl -s -H \"Content-Type: application/json\" -X POST -d '{\"input\": \"Hello World!\"}' localhost:8080/test\n", + "def test():\n", + " inputs = request.get_json().get('input')\n", + " output = \"Test successful, this was your input: \" + inputs\n", + " return jsonify(output=output)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "@app.route('/use-pretrained', methods=['GET'])\n", + "def use_pretrained():\n", + " \"\"\"Endpoint to load and use a pre-trained model.\"\"\"\n", + " # Can be run with:\n", + " # curl localhost:8080/use-pretrained\n", + " try:\n", + " # Load the pre-trained model and tokenizer\n", + " global loaded_tokenizer, loaded_model\n", + " loaded_tokenizer = AutoTokenizer.from_pretrained(\"SciBERT-finetuned-NER\")\n", + " loaded_model = AutoModelForTokenClassification.from_pretrained(\"SciBERT-finetuned-NER\")\n", + " return jsonify(success=\"Pre-trained model loaded successfully\")\n", + " except Exception as e:\n", + " return jsonify(error=str(e)), 500" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# curl -X POST -H \"Content-Type: application/json\" -d \"{\\\"model_name\\\": \\\"trainModelTest\\\", \\\"training_size\\\": 400}\" http://127.0.0.1:8080/train\n", + "\n", + "@app.route('/train', methods=['POST'])\n", + "def train():\n", + " global loaded_model, loaded_tokenizer\n", + " try:\n", + " data = request.get_json()\n", + " model_name = data.get('model_name')\n", + " training_size = data.get('training_size') \n", + "\n", + " # Load dataset and select subset for training\n", + "\n", + " if 'CW_datasets' not in globals():\n", + " return jsonify(error=\"Dataset not loaded\"), 500\n", + "\n", + " train_dataset = CW_datasets[\"train\"][:training_size]\n", + " val_dataset = CW_datasets[\"validation\"][:int(training_size/5)]\n", + "\n", + " # Initialize the tokenizer and model for training using SciBERT\n", + " scibert_model_name = \"allenai/scibert_scivocab_uncased\"\n", + " tokenizer = AutoTokenizer.from_pretrained(scibert_model_name)\n", + " model = AutoModelForTokenClassification.from_pretrained(scibert_model_name, num_labels=4)\n", + "\n", + " # Prepare labels and tokenize datasets\n", + " label_encoding = {\"B-O\": 0, \"B-AC\": 1, \"B-LF\": 2, \"I-LF\": 3}\n", + " label_list = create_label_list(train_dataset, label_encoding)\n", + " val_label_list = create_label_list(val_dataset, label_encoding)\n", + "\n", + " tokenized_datasets = tokenize_and_align_labels(tokenizer, train_dataset, label_list)\n", + " tokenized_val_datasets = tokenize_and_align_labels(tokenizer, val_dataset, val_label_list)\n", + "\n", + " tokenised_train = turn_dict_to_list_of_dict(tokenized_datasets)\n", + " tokenised_val = turn_dict_to_list_of_dict(tokenized_val_datasets)\n", + "\n", + " # Data collator\n", + " data_collator = DataCollatorForTokenClassification(tokenizer)\n", + "\n", + " # Training arguments\n", + " training_args = TrainingArguments(\n", + " output_dir=f'./results/{model_name}',\n", + " evaluation_strategy='steps',\n", + " eval_steps= int(training_size/10),\n", + " learning_rate=2e-5,\n", + " per_device_train_batch_size=2,\n", + " per_device_eval_batch_size=1,\n", + " num_train_epochs=1,\n", + " weight_decay=0.01,\n", + " save_steps=int(training_size/5),\n", + " metric_for_best_model='f1',\n", + " logging_dir=f'./logs/{model_name}',\n", + " logging_steps=int(training_size/5),\n", + " load_best_model_at_end=True\n", + " )\n", + "\n", + " # Trainer\n", + " trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=tokenised_train,\n", + " eval_dataset=tokenised_val,\n", + " data_collator=data_collator,\n", + " tokenizer=tokenizer,\n", + " compute_metrics=compute_metrics,\n", + " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]\n", + " )\n", + "\n", + " trainer.train()\n", + "\n", + " # Save and update the global references\n", + " model.save_pretrained(f'./custom_trained_model/{model_name}')\n", + " tokenizer.save_pretrained(f'./custom_trained_model/{model_name}')\n", + " loaded_model = model\n", + " loaded_tokenizer = tokenizer\n", + "\n", + " final_metrics = trainer.evaluate() # This will use the validation set\n", + " f1_score = final_metrics.get('eval_f1')\n", + "\n", + " return jsonify(success=f\"Model '{model_name}' trained successfully\", f1_score=f1_score)\n", + " except Exception as e:\n", + " return jsonify(error=str(e)), 500\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "@app.route('/predict', methods=['POST'])\n", + "## Train must be run before this\n", + "## run from command line with: curl -s -H \"Content-Type: application/json\" -X POST -d '{\"input\": }' localhost:8080/predict\n", + "## examples:\n", + "## curl -s -H \"Content-Type: application/json\" -X POST -d '{\"input\": \"For this purpose the Gothenburg Young Persons Empowerment Scale (GYPES) was developed.\"}' localhost:8080/predict\n", + "## curl -s -H \"Content-Type: application/json\" -X POST -d '{\"input\": \"Recent work by us and others suggest that the host’s heat shock protein 90 (Hsp90) chaperone can modulate the evolutionary paths traversed by viruses [18, 19].\"}' localhost:8080/predict\n", + "def predict():\n", + " inputs = request.get_json().get('input')\n", + " converted_inputs = split_string(inputs)\n", + " predictions = predict_tags(converted_inputs, loaded_tokenizer, loaded_model, label_encoding)\n", + "\n", + " ner_tags = [i[1] for i in predictions]\n", + " save_results(converted_inputs, ner_tags)\n", + " return jsonify(predictions = str(predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "@app.route('/read_logs')\n", + "## Reads the txt file containing results from previous predictions and outputs to the user\n", + "## Example\n", + "## curl localhost:8080/read_logs\n", + "def read_logs():\n", + " logs = load_results()\n", + " return logs" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " * Serving Flask app '__main__'\n", + " * Debug mode: off\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.\n", + " * Running on http://127.0.0.1:8080\n", + "Press CTRL+C to quit\n", + "127.0.0.1 - - [23/May/2024 13:01:01] \"GET /read_logs HTTP/1.1\" 200 -\n", + "/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n", + "Some weights of BertForTokenClassification were not initialized from the model checkpoint at allenai/scibert_scivocab_uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/training_args.py:1474: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d77dd376727c4a36a1ee1a534bcac5fe", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/200 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a664a7603f5e47489d54148ea0df322a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/80 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'eval_loss': 0.2954792082309723, 'eval_precision': 0.9192034139402561, 'eval_recall': 0.9145202377582791, 'eval_f1': 0.9168558456299659, 'eval_accuracy': 0.8985082578582845, 'eval_runtime': 14.9365, 'eval_samples_per_second': 5.356, 'eval_steps_per_second': 5.356, 'epoch': 0.2}\n", + "{'loss': 0.4214, 'grad_norm': 5.093618869781494, 'learning_rate': 1.2e-05, 'epoch': 0.4}\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fa9a911837844e33a6bf1da8d0a78685", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/80 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'eval_loss': 0.2313002347946167, 'eval_precision': 0.9325906883747501, 'eval_recall': 0.9241437871497311, 'eval_f1': 0.9283480238839921, 'eval_accuracy': 0.9211507725093234, 'eval_runtime': 1.7865, 'eval_samples_per_second': 44.78, 'eval_steps_per_second': 44.78, 'epoch': 0.4}\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dd3cbb9f87c6455f8ce0dfbe87a65e24", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/80 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'eval_loss': 0.19281569123268127, 'eval_precision': 0.9394714407502132, 'eval_recall': 0.9357486555335409, 'eval_f1': 0.9376063528077142, 'eval_accuracy': 0.933404368673415, 'eval_runtime': 1.7433, 'eval_samples_per_second': 45.889, 'eval_steps_per_second': 45.889, 'epoch': 0.6}\n", + "{'loss': 0.2374, 'grad_norm': 2.5805234909057617, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.8}\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7f4d46a1e57d4e8f9c3c3c39a39ca2ed", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/80 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'eval_loss': 0.20157472789287567, 'eval_precision': 0.9435206422018348, 'eval_recall': 0.9315029719784885, 'eval_f1': 0.9374732944025067, 'eval_accuracy': 0.9331379861481087, 'eval_runtime': 1.7554, 'eval_samples_per_second': 45.575, 'eval_steps_per_second': 45.575, 'epoch': 0.8}\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3071f54c2c48447980968e8c85ec0440", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/80 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'eval_loss': 0.18856896460056305, 'eval_precision': 0.941643059490085, 'eval_recall': 0.9408434757996037, 'eval_f1': 0.9412430978337817, 'eval_accuracy': 0.9363345764517848, 'eval_runtime': 1.8332, 'eval_samples_per_second': 43.64, 'eval_steps_per_second': 43.64, 'epoch': 1.0}\n", + "{'train_runtime': 97.3226, 'train_samples_per_second': 4.11, 'train_steps_per_second': 2.055, 'train_loss': 0.3157902050018311, 'epoch': 1.0}\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "53f66fbb204045529e17c1adca818fc9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/80 [00:00<?, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "127.0.0.1 - - [23/May/2024 13:02:52] \"POST /train HTTP/1.1\" 200 -\n", + "[2024-05-23 13:03:15,257] ERROR in app: Exception on /predict [POST]\n", + "Traceback (most recent call last):\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 2529, in wsgi_app\n", + " response = self.full_dispatch_request()\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 1825, in full_dispatch_request\n", + " rv = self.handle_user_exception(e)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 1823, in full_dispatch_request\n", + " rv = self.dispatch_request()\n", + " ^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 1799, in dispatch_request\n", + " return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/var/folders/r3/rgdzj4k90y31832__9r63w9w0000gn/T/ipykernel_20301/1553366013.py\", line 10, in predict\n", + " predictions = predict_tags(converted_inputs, loaded_tokenizer, loaded_model, label_encoding)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/var/folders/r3/rgdzj4k90y31832__9r63w9w0000gn/T/ipykernel_20301/3237725419.py\", line 9, in predict_tags\n", + " outputs = model(input_ids, attention_mask=attention_mask)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py\", line 1885, in forward\n", + " outputs = self.bert(\n", + " ^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py\", line 1073, in forward\n", + " embedding_output = self.embeddings(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py\", line 210, in forward\n", + " inputs_embeds = self.word_embeddings(input_ids)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/sparse.py\", line 163, in forward\n", + " return F.embedding(\n", + " ^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/functional.py\", line 2237, in embedding\n", + " return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "RuntimeError: Placeholder storage has not been allocated on MPS device!\n", + "127.0.0.1 - - [23/May/2024 13:03:15] \"POST /predict HTTP/1.1\" 500 -\n", + "[2024-05-23 13:03:22,677] ERROR in app: Exception on /predict [POST]\n", + "Traceback (most recent call last):\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 2529, in wsgi_app\n", + " response = self.full_dispatch_request()\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 1825, in full_dispatch_request\n", + " rv = self.handle_user_exception(e)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 1823, in full_dispatch_request\n", + " rv = self.dispatch_request()\n", + " ^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/flask/app.py\", line 1799, in dispatch_request\n", + " return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/var/folders/r3/rgdzj4k90y31832__9r63w9w0000gn/T/ipykernel_20301/1553366013.py\", line 10, in predict\n", + " predictions = predict_tags(converted_inputs, loaded_tokenizer, loaded_model, label_encoding)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/var/folders/r3/rgdzj4k90y31832__9r63w9w0000gn/T/ipykernel_20301/3237725419.py\", line 9, in predict_tags\n", + " outputs = model(input_ids, attention_mask=attention_mask)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py\", line 1885, in forward\n", + " outputs = self.bert(\n", + " ^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py\", line 1073, in forward\n", + " embedding_output = self.embeddings(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py\", line 210, in forward\n", + " inputs_embeds = self.word_embeddings(input_ids)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1511, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1520, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/modules/sparse.py\", line 163, in forward\n", + " return F.embedding(\n", + " ^^^^^^^^^^^^\n", + " File \"/Users/lukemenezes/anaconda3/lib/python3.11/site-packages/torch/nn/functional.py\", line 2237, in embedding\n", + " return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "RuntimeError: Placeholder storage has not been allocated on MPS device!\n", + "127.0.0.1 - - [23/May/2024 13:03:22] \"POST /predict HTTP/1.1\" 500 -\n" + ] + } + ], + "source": [ + "## Must be run with debug=False\n", + "if __name__ == '__main__':\n", + "\t# Entry point for running on the local machine\n", + "\t# host is localhost; port is 8080; this file is index (.py)\n", + "\tapp.run(host='127.0.0.1', port=8080, debug=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}