diff --git a/transfer_learning_attempt.ipynb b/transfer_learning_attempt.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..fc448b9bd33c23c7185902988dedded62f058c73
--- /dev/null
+++ b/transfer_learning_attempt.ipynb
@@ -0,0 +1,232 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb812f11-3c4b-4af2-91da-ce6062cbccfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from flask import Flask, jsonify, request\n",
+    "import json\n",
+    "import datasets, evaluate\n",
+    "from transformers import pipeline\n",
+    "import torch\n",
+    "from datetime import datetime\n",
+    "import numpy as np\n",
+    "import re\n",
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, TrainingArguments, Trainer, EarlyStoppingCallback\n",
+    "\n",
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "model_name = \"allenai/scibert_scivocab_uncased\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3edd79ea-0ca4-4543-8469-459f6d62603f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CW_datasets = datasets.load_dataset(\"surrey-nlp/PLOD-CW\")\n",
+    "train_dataset = CW_datasets[\"train\"]\n",
+    "test_dataset = CW_datasets[\"test\"]\n",
+    "\n",
+    "label_encoding = {\"B-O\": 0, \"B-AC\": 1, \"B-LF\": 2, \"I-LF\": 3}\n",
+    "\n",
+    "metric = evaluate.load(\"seqeval\")\n",
+    "\n",
+    "def tokenize_and_align_labels(examples):\n",
+    "    tokenized_inputs = tokenizer(examples['tokens'], truncation=True, is_split_into_words=True)\n",
+    "\n",
+    "    labels = []\n",
+    "    for i, label in enumerate(examples['ner_tags']):\n",
+    "        word_ids = tokenized_inputs.word_ids(batch_index=i)\n",
+    "        previous_word_idx = None\n",
+    "        label_ids = []\n",
+    "        for word_idx in word_ids:\n",
+    "            if word_idx is None:\n",
+    "                label_ids.append(-100)\n",
+    "            elif word_idx != previous_word_idx:\n",
+    "                label_ids.append(label_encoding[label[word_idx]])\n",
+    "            else:\n",
+    "                label_ids.append(-100)\n",
+    "            previous_word_idx = word_idx\n",
+    "        labels.append(label_ids)\n",
+    "\n",
+    "    tokenized_inputs[\"labels\"] = labels\n",
+    "    return tokenized_inputs\n",
+    "\n",
+    "tokenized_datasets = CW_datasets.map(tokenize_and_align_labels, batched=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24833ca6-b7f7-457a-9964-fbcbd71e1cb6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir='./results',\n",
+    "    num_train_epochs=3,\n",
+    "    per_device_train_batch_size=8,\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    warmup_steps=500,\n",
+    "    weight_decay=0.01,\n",
+    "    logging_dir='./logs',\n",
+    "    logging_steps=10,\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_datasets['train'],\n",
+    "    eval_dataset=tokenized_datasets['test'],\n",
+    ")\n",
+    "\n",
+    "trainer.train()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b773f9e-31ac-4e04-86d1-f0d558ee6b0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import Dataset\n",
+    "\n",
+    "def update_model_with_new_data(new_texts, new_labels):\n",
+    "    # Prepare new data\n",
+    "    new_data = {\"tokens\": new_texts, \"ner_tags\": new_labels}\n",
+    "    new_dataset = Dataset.from_dict(new_data)\n",
+    "    tokenized_new_dataset = new_dataset.map(tokenize_and_align_labels, batched=True)\n",
+    "    \n",
+    "    # Fine-tune the model with new data\n",
+    "    trainer = Trainer(\n",
+    "        model=model,\n",
+    "        args=training_args,\n",
+    "        train_dataset=tokenized_new_dataset,\n",
+    "    )\n",
+    "    trainer.train()\n",
+    "    \n",
+    "    # Save the updated model\n",
+    "    saved_model_path = \"./path_to_your_saved_model\"\n",
+    "    model.save_pretrained(saved_model_path)\n",
+    "    tokenizer.save_pretrained(saved_model_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50fbaeea-747d-4815-8f40-4e35e500193b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "\n",
+    "# Load the pre-trained SciBERT model\n",
+    "model_name = \"allenai/scibert_scivocab_uncased\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)  # Adjust num_labels based on your task\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7d14d66-fb01-4c91-87c8-442807acc1a4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_and_align_labels(examples):\n",
+    "    tokenized_inputs = tokenizer(examples['tokens'], truncation=True, is_split_into_words=True)\n",
+    "\n",
+    "    labels = []\n",
+    "    for i, label in enumerate(examples['ner_tags']):\n",
+    "        word_ids = tokenized_inputs.word_ids(batch_index=i)\n",
+    "        previous_word_idx = None\n",
+    "        label_ids = []\n",
+    "        for word_idx in word_ids:\n",
+    "            if word_idx is None:\n",
+    "                label_ids.append(-100)\n",
+    "            elif word_idx != previous_word_idx:\n",
+    "                label_ids.append(label_encoding[label[word_idx]])\n",
+    "            else:\n",
+    "                label_ids.append(-100)\n",
+    "            previous_word_idx = word_idx\n",
+    "        labels.append(label_ids)\n",
+    "\n",
+    "    tokenized_inputs[\"labels\"] = labels\n",
+    "    return tokenized_inputs\n",
+    "\n",
+    "tokenized_datasets = CW_datasets.map(tokenize_and_align_labels, batched=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e439e5b0-0edd-4f08-a186-df021fae7c9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def update_model_with_new_data(new_texts, new_labels):\n",
+    "    # Prepare new data\n",
+    "    new_data = {\"tokens\": new_texts, \"ner_tags\": new_labels}\n",
+    "    new_dataset = Dataset.from_dict(new_data)\n",
+    "    tokenized_new_dataset = new_dataset.map(tokenize_and_align_labels, batched=True)\n",
+    "    \n",
+    "    # Fine-tune the model with new data\n",
+    "    trainer = Trainer(\n",
+    "        model=model,\n",
+    "        args=training_args,\n",
+    "        train_dataset=tokenized_new_dataset,\n",
+    "    )\n",
+    "    trainer.train()\n",
+    "    \n",
+    "    # Save the updated model\n",
+    "    model.save_pretrained(saved_model_path)\n",
+    "    tokenizer.save_pretrained(saved_model_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f5ea617-f4be-4760-b5c5-8e9c83df377b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def update_model_with_new_data(new_texts, new_labels):\n",
+    "    # Prepare new data\n",
+    "    new_data = {\"tokens\": new_texts, \"ner_tags\": new_labels}\n",
+    "    new_dataset = Dataset.from_dict(new_data)\n",
+    "    tokenized_new_dataset = new_dataset.map(tokenize_and_align_labels, batched=True)\n",
+    "    \n",
+    "    # Fine-tune the model with new data\n",
+    "    trainer = Trainer(\n",
+    "        model=model,\n",
+    "        args=training_args,\n",
+    "        train_dataset=tokenized_new_dataset,\n",
+    "    )\n",
+    "    trainer.train()\n",
+    "    \n",
+    "    # Save the updated model\n",
+    "    model.save_pretrained(saved_model_path)\n",
+    "    tokenizer.save_pretrained(saved_model_path)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "",
+   "name": ""
+  },
+  "language_info": {
+   "name": ""
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}