From 3e845c1f08f8440cfee4f00bd84ecfb51787d513 Mon Sep 17 00:00:00 2001 From: fw00355 <fw00355@surrey.ac.uk> Date: Wed, 22 May 2024 14:20:53 +0100 Subject: [PATCH] Changes before submission --- .runner_system_id | 1 - README.md | 10 + app/.runner_system_id | 1 - confusion_matrix.csv | 6 - fredRNN.ipynb | 1156 --------------------------------- predictions_with_accuracy.csv | 33 - 6 files changed, 10 insertions(+), 1197 deletions(-) delete mode 100644 .runner_system_id delete mode 100644 app/.runner_system_id delete mode 100644 confusion_matrix.csv delete mode 100644 fredRNN.ipynb delete mode 100644 predictions_with_accuracy.csv diff --git a/.runner_system_id b/.runner_system_id deleted file mode 100644 index e69e2f9..0000000 --- a/.runner_system_id +++ /dev/null @@ -1 +0,0 @@ -s_f301446f19c0 \ No newline at end of file diff --git a/README.md b/README.md index 96143db..6732b9f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,13 @@ # COM3029 CW Open app/app.ipynb to use the group work application. + + +.gitlab-ci.yml is needed for CI/CD pipelines, though settingup GitLab Runner is also needed. + +Requirements: +Docker +NVIDEA GPU + + +This project was only tested on Windows machines diff --git a/app/.runner_system_id b/app/.runner_system_id deleted file mode 100644 index e69e2f9..0000000 --- a/app/.runner_system_id +++ /dev/null @@ -1 +0,0 @@ -s_f301446f19c0 \ No newline at end of file diff --git a/confusion_matrix.csv b/confusion_matrix.csv deleted file mode 100644 index 0fafe03..0000000 --- a/confusion_matrix.csv +++ /dev/null @@ -1,6 +0,0 @@ -,<pad>,B-O,B-AC,B-LF,I-LF -<pad>,1549,0,0,0,0 -B-O,0,22,0,32,2 -B-AC,0,0,8,23,0 -B-LF,0,6,7,1010,13 -I-LF,0,1,0,27,20 diff --git a/fredRNN.ipynb b/fredRNN.ipynb deleted file mode 100644 index 41005bc..0000000 --- a/fredRNN.ipynb +++ /dev/null @@ -1,1156 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import torch\n", - "import torchtext\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "import numpy as np\n", - "import random\n", - "from datasets import load_dataset\n", - "torch.backends.cudnn.deterministic = True\n", - "DEVICE = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "import pandas as pd\n", - "import gc\n", - "from matplotlib import pyplot as plt\n", - "from tqdm import tqdm\n", - "from datasets import load_metric\n", - "gc.collect()\n", - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "from torchtext.data.functional import to_map_style_dataset\n", - "dataset = load_dataset(\"surrey-nlp/PLOD-CW\")\n", - "# This might take a while\n", - "\n", - "\n", - "\n", - "train_data = to_map_style_dataset(dataset['train'])\n", - "val_data = to_map_style_dataset(dataset['validation'])\n", - "test_data = to_map_style_dataset(dataset['test'])" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of training examples: 1072\n", - "Number of validation examples: 126\n", - "Number of testing examples: 153\n", - "{'tokens': ['Abbreviations', ':', 'GEMS', ',', 'Global', 'Enteric', 'Multicenter', 'Study', ';', 'VIP', ',', 'ventilated', 'improved', 'pit', '.'], 'pos_tags': ['NOUN', 'PUNCT', 'PROPN', 'PUNCT', 'PROPN', 'PROPN', 'PROPN', 'PROPN', 'PUNCT', 'PROPN', 'PUNCT', 'VERB', 'ADJ', 'NOUN', 'PUNCT'], 'ner_tags': ['B-O', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O']}\n" - ] - } - ], - "source": [ - "print(f'Number of training examples: {len(train_data)}')\n", - "print(f'Number of validation examples: {len(val_data)}')\n", - "print(f'Number of testing examples: {len(test_data)}')\n", - "print(test_data[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unique tokens in text vocabulary: 9135\n", - "Unique tokens in POS vocabulary: 19\n", - "Unique tokens in label vocabulary: 4\n", - "['<unk>', '<pad>', ',', '(', ')', 'the', '.', 'of', 'and', '-']\n", - "{'B-O': 0, 'B-AC': 1, 'B-LF': 2, 'I-LF': 3}\n", - "['<unk>', '<pad>', 'NOUN', 'PUNCT', 'PROPN', 'ADP', 'ADJ', 'VERB', 'DET', 'NUM']\n" - ] - } - ], - "source": [ - "from torchtext.vocab import build_vocab_from_iterator, vocab\n", - "from torchtext.data.utils import get_tokenizer\n", - "from collections import OrderedDict\n", - "\n", - "MAX_VOCAB_SIZE = 100_000\n", - "\n", - "def _process_texts_for_vocab(data):\n", - " for item in data:\n", - " yield item['tokens']\n", - "def _process_pos_for_vocab(data):\n", - " for item in data:\n", - " yield item['pos_tags']\n", - "\n", - "text_vocab = build_vocab_from_iterator(_process_texts_for_vocab(train_data), specials=('<unk>', '<pad>'), max_tokens=MAX_VOCAB_SIZE)\n", - "pos_vocab = build_vocab_from_iterator(_process_pos_for_vocab(train_data), specials=('<unk>', '<pad>'), min_freq=1)\n", - "\n", - "diction = OrderedDict([(\"<pad>\", 0),(\"B-O\",1),(\"B-AC\",2),(\"B-LF\",3),(\"I-LF\",4)])\n", - "label_vocab = vocab(diction)\n", - "text_vocab.set_default_index(text_vocab[\"<unk>\"])\n", - "pos_vocab.set_default_index(text_vocab[\"<unk>\"])\n", - "label_vocab.set_default_index(0)\n", - "print(f\"Unique tokens in text vocabulary: {len(text_vocab)}\")\n", - "print(f\"Unique tokens in POS vocabulary: {len(pos_vocab)}\")\n", - "print(f\"Unique tokens in label vocabulary: {len(label_vocab)}\")\n", - "\n", - "\n", - "print(text_vocab.get_itos()[:10])\n", - "print(label_vocab.get_stoi())\n", - "print(pos_vocab.get_itos()[:10])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(',', 2268), ('(', 1583), (')', 1575), ('the', 1136), ('.', 1073), ('of', 995), ('and', 903), ('-', 901), (';', 691), ('in', 585), ('to', 449), (']', 367), ('[', 358), ('a', 346), ('with', 297), ('for', 221), ('were', 201), ('was', 194), (':', 180), ('by', 178)]\n" - ] - } - ], - "source": [ - "from collections import Counter\n", - "\n", - "\n", - "counter = Counter()\n", - "for data_point in train_data:\n", - " tokens = data_point['tokens']\n", - " counter.update(tokens)\n", - "most_common_tokens = counter.most_common(20)\n", - "print(most_common_tokens)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "from torch.utils.data import Dataset, DataLoader\n", - "from torch.nn.utils.rnn import pad_sequence\n", - "class NERDataset(Dataset):\n", - " def __init__(self, data, text_vocab, tag_vocab):\n", - " self.data = data\n", - " self.text_vocab = text_vocab\n", - " self.tag_vocab = tag_vocab\n", - " \n", - " def __len__(self):\n", - " return len(self.data)\n", - " \n", - " def __getitem__(self, idx):\n", - " tokens = self.data[idx]['tokens']\n", - " ner_tags = self.data[idx]['ner_tags']\n", - " text_numerical = [self.text_vocab[token] for token in tokens]\n", - " tag_numerical = [self.tag_vocab[tag] for tag in ner_tags]\n", - " \n", - " return torch.tensor(text_numerical, dtype=torch.long), torch.tensor(tag_numerical, dtype=torch.long)\n", - " \n", - "def collate_fn(batch):\n", - " tokens, tags = zip(*batch)\n", - " tokens_padded = pad_sequence(tokens, batch_first=True, padding_value=text_vocab['<pad>'])\n", - " tags_padded = pad_sequence(tags, batch_first=True, padding_value=label_vocab['<pad>']) # Assuming you have a pad token in your label vocab\n", - "\n", - " return tokens_padded, tags_padded\n", - "\n", - "# Create instances of the NERDataset for training, validation, and test data\n", - "train_dataset = NERDataset(train_data, text_vocab, label_vocab)\n", - "val_dataset = NERDataset(val_data, text_vocab, label_vocab)\n", - "test_dataset = NERDataset(test_data, text_vocab, label_vocab)\n", - "\n", - "# Create DataLoaders\n", - "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)\n", - "val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)\n", - "test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([32, 100]) torch.Size([32, 100])\n" - ] - } - ], - "source": [ - "from torch.utils.data import Dataset, DataLoader\n", - "from torch.nn.utils.rnn import pad_sequence\n", - "class NERDatasetPOS(Dataset):\n", - " def __init__(self, data, text_vocab, pos_vocab, tag_vocab):\n", - " self.data = data\n", - " self.text_vocab = text_vocab\n", - " self.pos_vocab = pos_vocab\n", - " self.tag_vocab = tag_vocab\n", - "\n", - " def __len__(self):\n", - " return len(self.data)\n", - "\n", - " def __getitem__(self, idx):\n", - " tokens = self.data[idx]['tokens']\n", - " pos_tags = self.data[idx]['pos_tags']\n", - " ner_tags = self.data[idx]['ner_tags']\n", - " \n", - " # Concatenate token and POS tag\n", - " tokens_with_pos = ['{}_{}'.format(token, pos) for token, pos in zip(tokens, pos_tags)]\n", - "\n", - " text_numerical = [self.text_vocab.get(token_with_pos, self.text_vocab['<unk>']) for token_with_pos in tokens_with_pos]\n", - " tag_numerical = [self.tag_vocab[tag] for tag in ner_tags]\n", - "\n", - " return torch.tensor(text_numerical, dtype=torch.long), torch.tensor(tag_numerical, dtype=torch.long)\n", - " \n", - "def collate_fn(batch):\n", - " tokens, tags = zip(*batch)\n", - " tokens_padded = pad_sequence(tokens, batch_first=True, padding_value=text_vocab['<pad>'])\n", - " tags_padded = pad_sequence(tags, batch_first=True, padding_value=label_vocab['<pad>']) # Assuming you have a pad token in your label vocab\n", - "\n", - " return tokens_padded, tags_padded\n", - "\n", - "train_dataset_pos = NERDatasetPOS(train_data, text_vocab, pos_vocab, label_vocab)\n", - "val_dataset_pos = NERDatasetPOS(val_data, text_vocab, pos_vocab, label_vocab)\n", - "test_dataset_pos = NERDatasetPOS(test_data, text_vocab, pos_vocab, label_vocab)\n", - "\n", - "# Create DataLoaders as before\n", - "train_loader_pos = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)\n", - "val_loader_pos = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)\n", - "test_loader_pos = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)\n", - "for batch in train_loader_pos:\n", - " print(batch[0].shape, batch[1].shape)\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "class RNN(nn.Module):\n", - " def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout):\n", - " super().__init__()\n", - " self.embedding = nn.Embedding(vocab_size, embedding_dim)\n", - " self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)\n", - " self.fc = nn.Linear(hidden_dim, output_dim)\n", - " self.dropout = nn.Dropout(dropout)\n", - "\n", - " def forward(self, text):\n", - " # text = [batch size, sent len]\n", - " embedded = self.dropout(self.embedding(text))\n", - " # embedded = [batch size, sent len, emb dim]\n", - " outputs, _ = self.rnn(embedded)\n", - " # outputs = [batch size, sent len, hid dim]\n", - " predictions = self.fc(self.dropout(outputs))\n", - " # predictions = [batch size, sent len, output dim]\n", - " return predictions\n" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "def train(model, iterator, optimizer, criterion, device):\n", - " model.train()\n", - " epoch_loss = 0\n", - " \n", - " for batch in iterator:\n", - " text, tags = batch\n", - " text, tags = text.to(device), tags.to(device)\n", - " \n", - " optimizer.zero_grad()\n", - " predictions = model(text)\n", - " \n", - " # Reshape for calculating loss\n", - " predictions = predictions.view(-1, predictions.shape[-1])\n", - " tags = tags.view(-1)\n", - " \n", - " loss = criterion(predictions, tags)\n", - " loss.backward()\n", - " optimizer.step()\n", - " epoch_loss += loss.item()\n", - " \n", - " return epoch_loss / len(iterator)\n", - "def evaluate(model, iterator, criterion, device):\n", - " model.eval()\n", - " epoch_loss = 0\n", - " \n", - " with torch.no_grad():\n", - " for batch in iterator:\n", - " text, tags = batch\n", - " text, tags = text.to(device), tags.to(device)\n", - " predictions = model(text)\n", - " predictions = predictions.view(-1, predictions.shape[-1])\n", - " tags = tags.view(-1)\n", - " loss = criterion(predictions, tags)\n", - " epoch_loss += loss.item()\n", - " \n", - " return epoch_loss / len(iterator)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "THIS IS THE BASE TRAINING CODE, THE MODEL PARAMETERS ARE WHAT WERE FOUND VIA TRIAL AND ERROR AS A DECENT STARTING POINT. THEY ARE THE DEFAULT PARAMETERS FOR MY EXPERIMENTATION\n", - "INPUT_DIM = len(text_vocab)\n", - "EMBEDDING_DIM = 4096\n", - "HIDDEN_DIM = 128\n", - "OUTPUT_DIM = len(label_vocab)\n", - "DROPOUT = 0.8\n", - "\n", - "Training: 100%|██████████| 100/100 [02:04<00:00, 1.25s/it, Epoch 100/100, Train Loss: 0.0928, Val Loss: 0.1511]\n", - "\n", - "CPU times: total: 2min 4s\n", - "\n", - "Wall time: 2min 5s" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Main\\anaconda3\\envs\\COM3029\\lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "Training: 100%|██████████| 100/100 [01:52<00:00, 1.13s/it, Epoch 100/100, Train Loss: 0.0928, Val Loss: 0.1875]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "saved: BASE/4096dim_128hidden_100epochs_08drop.pth\n", - "CPU times: total: 1min 53s\n", - "Wall time: 1min 55s\n" - ] - } - ], - "source": [ - "%%time\n", - "# Model parameters\n", - "INPUT_DIM = len(text_vocab)\n", - "EMBEDDING_DIM = 4096\n", - "HIDDEN_DIM = 128\n", - "OUTPUT_DIM = len(label_vocab)\n", - "DROPOUT = 0.8\n", - "\n", - "# Instantiate the model with the corrected RNNforNER class parameters\n", - "model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, DROPOUT)\n", - "model = model.to(DEVICE)\n", - "\n", - "# Optimizer and loss function\n", - "optimizer = optim.Adam(model.parameters())\n", - "# Ensure you have a padding index for your labels. Replace '<pad>' with your actual padding token index if necessary\n", - "criterion = nn.CrossEntropyLoss() #ignore_index=label_vocab['<pad>']\n", - "criterion = criterion.to(DEVICE)\n", - "\n", - "train_loss_tracking = []\n", - "val_loss_tracking = []\n", - "# Training loop\n", - "N_EPOCHS = 100\n", - "pbar = tqdm(range(N_EPOCHS),desc=\"Training\")\n", - "for epoch in pbar:\n", - " train_loss = train(model, train_loader, optimizer, criterion, DEVICE)\n", - " valid_loss = evaluate(model, val_loader, criterion, DEVICE)\n", - " train_loss_tracking.append(train_loss)\n", - " val_loss_tracking.append(valid_loss)\n", - " pbar.set_postfix_str(f'Epoch {epoch+1}/{N_EPOCHS}, Train Loss: {train_loss:.4f}, Val Loss: {valid_loss:.4f}')\n", - "model_name = \"{0}dim_{1}hidden_{2}epochs_{3}drop.pth\".format(EMBEDDING_DIM,HIDDEN_DIM,N_EPOCHS,str(DROPOUT).replace(\".\",\"\"))\n", - "torch.save(model.state_dict(),model_name)\n", - "print(\"saved: \",model_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "THIS IS THE CODE FOR TRAINING THE POS_TAGGED MODEL\n", - "\n", - "\n", - "Training: 100%|██████████| 100/100 [02:08<00:00, 1.28s/it, Epoch 100/100, Train Loss: 0.0958, Val Loss: 0.1714]\n", - "\n", - "\n", - "saved: 4096dim_128hidden_100epochs_08drop_POS_tagged.pth\n", - "\n", - "\n", - "\n", - "CPU times: total: 2min 7s\n", - "\n", - "\n", - "Wall time: 2min 8s\n", - "\n", - "\n", - "This has slightly worse Validation loss, however, there is a smaller gap between validation loss and training loss, suggesting a reduction in overfitting" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Training: 32%|███■| 32/100 [00:38<01:23, 1.23s/it, Epoch 32/100, Train Loss: 0.1715, Val Loss: 0.2057]" - ] - } - ], - "source": [ - "%%time\n", - "# Model parameters\n", - "INPUT_DIM = len(text_vocab)\n", - "EMBEDDING_DIM = 4096\n", - "HIDDEN_DIM = 128\n", - "OUTPUT_DIM = len(label_vocab)\n", - "DROPOUT = 0.8\n", - "\n", - "# Instantiate the model with the corrected RNNforNER class parameters\n", - "model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, DROPOUT)\n", - "model = model.to(DEVICE)\n", - "\n", - "# Optimizer and loss function\n", - "optimizer = optim.Adam(model.parameters())\n", - "# Ensure you have a padding index for your labels. Replace '<pad>' with your actual padding token index if necessary\n", - "criterion = nn.CrossEntropyLoss() #ignore_index=label_vocab['<pad>']\n", - "criterion = criterion.to(DEVICE)\n", - "\n", - "train_loss_tracking = []\n", - "val_loss_tracking = []\n", - "# Training loop\n", - "N_EPOCHS = 100\n", - "pbar = tqdm(range(N_EPOCHS),desc=\"Training\")\n", - "for epoch in pbar:\n", - " train_loss = train(model, train_loader_pos, optimizer, criterion, DEVICE)\n", - " valid_loss = evaluate(model, val_loader_pos, criterion, DEVICE)\n", - " train_loss_tracking.append(train_loss)\n", - " val_loss_tracking.append(valid_loss)\n", - " pbar.set_postfix_str(f'Epoch {epoch+1}/{N_EPOCHS}, Train Loss: {train_loss:.4f}, Val Loss: {valid_loss:.4f}')\n", - "model_name = \"{0}dim_{1}hidden_{2}epochs_{3}drop_POS_tagged.pth\".format(EMBEDDING_DIM,HIDDEN_DIM,N_EPOCHS,str(DROPOUT).replace(\".\",\"\"))\n", - "torch.save(model.state_dict(),model_name)\n", - "print(\"saved: \",model_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from sklearn.manifold import TSNE\n", - "import matplotlib.pyplot as plt\n", - "import gensim.downloader as api\n", - "word2vec_model = api.load('word2vec-google-news-300')" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([32, 93, 300]) torch.Size([32, 93])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Main\\AppData\\Local\\Temp\\ipykernel_13756\\2545187020.py:17: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\torch\\csrc\\utils\\tensor_new.cpp:278.)\n", - " return torch.tensor(embeddings, dtype=torch.float), torch.tensor(tag_numerical, dtype=torch.long)\n" - ] - } - ], - "source": [ - "class Word2VecDataset(Dataset):\n", - " def __init__(self, data, word2vec_model, tag_vocab):\n", - " self.data = data\n", - " self.word2vec_model = word2vec_model\n", - " self.tag_vocab = tag_vocab\n", - " \n", - " def __len__(self):\n", - " return len(self.data)\n", - " \n", - " def __getitem__(self, idx):\n", - " tokens = self.data[idx]['tokens']\n", - " ner_tags = self.data[idx]['ner_tags']\n", - "\n", - " embeddings = [self.word2vec_model[token] if token in self.word2vec_model else np.zeros(300) for token in tokens]\n", - " tag_numerical = [self.tag_vocab[tag] for tag in ner_tags]\n", - " \n", - " return torch.tensor(embeddings, dtype=torch.float), torch.tensor(tag_numerical, dtype=torch.long)\n", - "def collate_fn(batch):\n", - " embeddings, tags = zip(*batch)\n", - " lengths = [len(seq) for seq in embeddings]\n", - " embeddings_padded = pad_sequence(embeddings, batch_first=True)\n", - " tags_padded = pad_sequence(tags, batch_first=True, padding_value=label_vocab['<pad>'])\n", - " lengths = torch.tensor(lengths, dtype=torch.long)\n", - " \n", - " return embeddings_padded, tags_padded\n", - "\n", - "train_dataset_w2v = Word2VecDataset(train_data, word2vec_model, label_vocab)\n", - "val_dataset_w2v = Word2VecDataset(val_data, word2vec_model, label_vocab)\n", - "test_dataset_w2v = Word2VecDataset(test_data, word2vec_model, label_vocab)\n", - "\n", - "\n", - "train_loader_w2v = DataLoader(train_dataset_w2v, batch_size=32, shuffle=True, collate_fn=collate_fn)\n", - "val_loader_w2v = DataLoader(val_dataset_w2v, batch_size=32, shuffle=False, collate_fn=collate_fn)\n", - "test_loader_w2v = DataLoader(test_dataset_w2v, batch_size=32, shuffle=False, collate_fn=collate_fn)\n", - "\n", - "# To check the shape of a batch\n", - "for embeddings_padded, tags_padded in train_loader_w2v:\n", - " print(embeddings_padded.shape, tags_padded.shape)\n", - " break" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "THIS IS THE CODE FOR TEXT2VEC USES DEFAULT PARAMETERS, CHANGES INPUT TO A WORD2VEC REPRESENTATION RATHER THAN VOCAB" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "class RNNW2V(nn.Module): #needs to rewrite this, since it is already embeded by word2vec\n", - " def __init__(self, embedding_dim, hidden_dim, output_dim, dropout):\n", - " super().__init__()\n", - " self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)\n", - " self.fc = nn.Linear(hidden_dim, output_dim)\n", - " self.dropout = nn.Dropout(dropout)\n", - "\n", - " def forward(self, embedded):\n", - " embedded = self.dropout(embedded)\n", - " outputs, _ = self.rnn(embedded)\n", - " predictions = self.fc(self.dropout(outputs))\n", - " return predictions\n" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Training: 0%| | 0/100 [00:00<?, ?it/s]" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Training: 100%|██████████| 100/100 [03:48<00:00, 2.29s/it, Epoch 100/100, Train Loss: 0.1972, Val Loss: 0.1396]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "saved: 300dim_128hidden_100epochs_08drop_w2v.pth\n", - "CPU times: total: 14min 55s\n", - "Wall time: 3min 48s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "%%time\n", - "# Model parameters\n", - "EMBEDDING_DIM = 300 #this is the embedding dimension of gensim's word2vec model\n", - "HIDDEN_DIM = 128\n", - "OUTPUT_DIM = len(label_vocab)\n", - "DROPOUT = 0.8\n", - "\n", - "# Instantiate the model with the corrected RNNforNER class parameters\n", - "model = RNNW2V(EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, DROPOUT)\n", - "model = model.to(DEVICE)\n", - "\n", - "# Optimizer and loss function\n", - "optimizer = optim.Adam(model.parameters())\n", - "# Ensure you have a padding index for your labels. Replace '<pad>' with your actual padding token index if necessary\n", - "criterion = nn.CrossEntropyLoss() #ignore_index=label_vocab['<pad>']\n", - "criterion = criterion.to(DEVICE)\n", - "\n", - "train_loss_tracking = []\n", - "val_loss_tracking = []\n", - "# Training loop\n", - "N_EPOCHS = 100\n", - "pbar = tqdm(range(N_EPOCHS),desc=\"Training\")\n", - "for epoch in pbar:\n", - " train_loss = train(model, train_loader_w2v, optimizer, criterion, DEVICE)\n", - " valid_loss = evaluate(model, val_loader_w2v, criterion, DEVICE)\n", - " train_loss_tracking.append(train_loss)\n", - " val_loss_tracking.append(valid_loss)\n", - " pbar.set_postfix_str(f'Epoch {epoch+1}/{N_EPOCHS}, Train Loss: {train_loss:.4f}, Val Loss: {valid_loss:.4f}')\n", - "model_name = \"{0}dim_{1}hidden_{2}epochs_{3}drop_w2v.pth\".format(EMBEDDING_DIM,HIDDEN_DIM,N_EPOCHS,str(DROPOUT).replace(\".\",\"\"))\n", - "torch.save(model.state_dict(),model_name)\n", - "print(\"saved: \",model_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "api.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(train_loss_tracking, label='Training loss')\n", - "plt.plot(val_loss_tracking, label='Validation loss')\n", - "plt.legend()\n", - "plt.xlabel('Epoch')\n", - "plt.ylabel('Loss')\n", - "plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "ename": "ZeroDivisionError", - "evalue": "division by zero", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[28], line 99\u001b[0m\n\u001b[0;32m 95\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m accuracies\n\u001b[0;32m 98\u001b[0m \u001b[38;5;66;03m# Call this function after or during your training loop, passing in the label_vocab and text_vocab\u001b[39;00m\n\u001b[1;32m---> 99\u001b[0m predictions_df, confusion_matrix_df \u001b[38;5;241m=\u001b[39m \u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_loader\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mDEVICE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_vocab\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_vocab\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 100\u001b[0m \u001b[38;5;28mprint\u001b[39m(confusion_matrix_df\u001b[38;5;241m.\u001b[39mhead())\n\u001b[0;32m 101\u001b[0m predictions_df\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions_with_accuracy_\u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(model_name)\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.pth\u001b[39m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m),index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", - "Cell \u001b[1;32mIn[28], line 34\u001b[0m, in \u001b[0;36mpredict\u001b[1;34m(model, iterator, device, label_vocab, text_vocab)\u001b[0m\n\u001b[0;32m 31\u001b[0m true_tags \u001b[38;5;241m=\u001b[39m [[label_vocab\u001b[38;5;241m.\u001b[39mget_itos()[index] \u001b[38;5;28;01mfor\u001b[39;00m index \u001b[38;5;129;01min\u001b[39;00m sentence] \u001b[38;5;28;01mfor\u001b[39;00m sentence \u001b[38;5;129;01min\u001b[39;00m tags]\n\u001b[0;32m 33\u001b[0m \u001b[38;5;66;03m# Compute accuracy for this batch\u001b[39;00m\n\u001b[1;32m---> 34\u001b[0m batch_accuracy_noPad \u001b[38;5;241m=\u001b[39m \u001b[43mcompute_accuracy_modified\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpredictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 35\u001b[0m batch_accuracy \u001b[38;5;241m=\u001b[39m compute_accuracy(predictions, tags)\n\u001b[0;32m 36\u001b[0m accuracy_list\u001b[38;5;241m.\u001b[39mextend(batch_accuracy_noPad)\n", - "Cell \u001b[1;32mIn[28], line 93\u001b[0m, in \u001b[0;36mcompute_accuracy_modified\u001b[1;34m(predictions, tags)\u001b[0m\n\u001b[0;32m 91\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m p\u001b[38;5;241m==\u001b[39mt \u001b[38;5;129;01mand\u001b[39;00m p\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 92\u001b[0m count \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m---> 93\u001b[0m accuracy \u001b[38;5;241m=\u001b[39m \u001b[43mcorrect\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtrue\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43mcount\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(true) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m 94\u001b[0m accuracies\u001b[38;5;241m.\u001b[39mappend(accuracy)\n\u001b[0;32m 95\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m accuracies\n", - "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import torch\n", - "from sklearn.metrics import confusion_matrix, f1_score\n", - "\n", - "def predict(model, iterator, device, label_vocab, text_vocab):\n", - " model.eval()\n", - " \n", - " text_list = []\n", - " true_labels_list = []\n", - " predicted_labels_list = []\n", - " accuracy_list = []\n", - " all_true_labels = []\n", - " all_predicted_labels = []\n", - " accuracy_list_pad = []\n", - "\n", - " with torch.no_grad():\n", - " for batch in iterator:\n", - " text, tags = batch\n", - " text, tags = text.to(device), tags.to(device)\n", - " predictions = model(text)\n", - " \n", - " # Convert predictions to labels\n", - " predictions = predictions.argmax(dim=2)\n", - " \n", - " # Convert tensors to lists for easier handling\n", - " predictions = predictions.tolist()\n", - " tags = tags.tolist()\n", - " \n", - " # Convert indices to strings (NER tags)\n", - " predictions_tags = [[label_vocab.get_itos()[index] for index in sentence] for sentence in predictions]\n", - " true_tags = [[label_vocab.get_itos()[index] for index in sentence] for sentence in tags]\n", - " \n", - " # Compute accuracy for this batch\n", - " batch_accuracy_noPad = compute_accuracy_modified(predictions, tags)\n", - " batch_accuracy = compute_accuracy(predictions, tags)\n", - " accuracy_list.extend(batch_accuracy_noPad)\n", - " accuracy_list_pad.extend(batch_accuracy)\n", - "\n", - " # Convert indices to text\n", - " text_tokens = [[text_vocab.get_itos()[index] for index in sentence] for sentence in text.tolist()]\n", - " \n", - " # Append to lists\n", - " for i in range(len(predictions_tags)): \n", - " text_list.append(text_tokens[i])\n", - " true_labels_list.append(true_tags[i])\n", - " predicted_labels_list.append(predictions_tags[i])\n", - " all_true_labels.extend(true_tags[i])\n", - " all_predicted_labels.extend(predictions_tags[i])\n", - "\n", - " break # Only show the first batch or part of it\n", - " \n", - " # Create DataFrame\n", - " df = pd.DataFrame({\n", - " 'Text': text_list, \n", - " 'True Labels': true_labels_list, \n", - " 'Predicted Labels': predicted_labels_list, \n", - " 'Accuracy (excl. Padding)': accuracy_list, \n", - " 'Accuracy (incl. Padding)': accuracy_list_pad\n", - " })\n", - " \n", - " \n", - " cm = confusion_matrix(all_true_labels, all_predicted_labels)\n", - " cm_df = pd.DataFrame(cm, index=label_vocab.get_itos(), columns=label_vocab.get_itos())\n", - " \n", - " non_pad_labels = [\"B-O\",\"B-AC\",\"B-LF\",\"I-LF\"] # Assuming 0 is <pad>\n", - " f1_no_pad = f1_score(all_true_labels, all_predicted_labels, labels=non_pad_labels, average='weighted')\n", - " f1_pad = f1_score(all_true_labels, all_predicted_labels, average='weighted')\n", - "\n", - "\n", - " df['F1 Score (excl. Padding)'] = f1_no_pad\n", - " df[\"F1 Score (inc. Padding)\"] = f1_pad\n", - " return df, cm_df\n", - "\n", - "def compute_accuracy(predictions, tags):\n", - " accuracies = []\n", - " for pred, true in zip(predictions, tags):\n", - " correct = sum(p == t for p, t in zip(pred, true))\n", - " accuracy = correct / len(true) if len(true) > 0 else 0\n", - " accuracies.append(accuracy)\n", - " return accuracies\n", - "\n", - "def compute_accuracy_modified(predictions, tags):\n", - " accuracies = []\n", - " for pred, true in zip(predictions, tags):\n", - " #correct = sum(p == t for p, t in zip(pred, true))\n", - " correct = 0\n", - " count = 0 \n", - " for p,t in zip(pred,true):\n", - " if p == t and p != 0: #0 == <pad>\n", - " correct += 1\n", - " elif p==t and p==0:\n", - " count += 1\n", - " accuracy = correct / (len(true)-count) if len(true) > 0 and len(true)-count != 0 else 0\n", - " accuracies.append(accuracy)\n", - " return accuracies\n", - "\n", - "\n", - "# Call this function after or during your training loop, passing in the label_vocab and text_vocab\n", - "predictions_df, confusion_matrix_df = predict(model, test_loader, DEVICE, label_vocab, text_vocab)\n", - "print(confusion_matrix_df.head())\n", - "predictions_df.to_csv(\"predictions_with_accuracy_{0}.csv\".format(model_name).replace(\".pth\",\"\"),index=False)\n", - "confusion_matrix_df.to_csv(\"confusion_matrix_{0}.csv\".format(model_name.replace(\".pth\",\"\")),index=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CODE FOR BERT" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Main\\anaconda3\\envs\\COM3029\\lib\\site-packages\\transformers\\utils\\generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "c:\\Users\\Main\\anaconda3\\envs\\COM3029\\lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "c:\\Users\\Main\\anaconda3\\envs\\COM3029\\lib\\site-packages\\transformers\\utils\\generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\n", - " _torch_pytree._register_pytree_node(\n", - "Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (542 > 512). Running this sequence through the model will result in indexing errors\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['[CLS]', 'for', 'this', 'purpose', 'the', 'gothenburg', 'young', 'persons', 'empowerment', 'scale', '(', 'g', '##ype', '##s', ')', 'was', 'developed', '.', '[SEP]']\n" - ] - } - ], - "source": [ - "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", - "model = AutoModelForTokenClassification.from_pretrained(\"bert-base-uncased\", num_labels=4)\n", - "short_dataset = dataset[\"train\"]\n", - "val_dataset = dataset[\"validation\"]\n", - "test_dataset = dataset[\"test\"]\n", - "tokenized_input = tokenizer(short_dataset[\"tokens\"], is_split_into_words=True)\n", - "\n", - "# Example single sentence example.\n", - "for token in tokenized_input[\"input_ids\"]:\n", - " print(tokenizer.convert_ids_to_tokens(token))\n", - " break\n", - "label_encoding = {\"B-O\": 0, \"B-AC\": 1, \"B-LF\": 2, \"I-LF\": 3}\n", - "\n", - "label_list = []\n", - "for sample in short_dataset[\"ner_tags\"]:\n", - " label_list.append([label_encoding[tag] for tag in sample])\n", - "\n", - "val_label_list = []\n", - "for sample in val_dataset[\"ner_tags\"]:\n", - " val_label_list.append([label_encoding[tag] for tag in sample])\n", - "\n", - "test_label_list = []\n", - "for sample in test_dataset[\"ner_tags\"]:\n", - " test_label_list.append([label_encoding[tag] for tag in sample])\n", - "\n", - "def tokenize_and_align_labels(short_dataset, list_name):\n", - " tokenized_inputs = tokenizer(short_dataset[\"tokens\"], truncation=True, is_split_into_words=True) ## For some models, you may need to set max_length to approximately 500.\n", - "\n", - " labels = []\n", - " for i, label in enumerate(list_name):\n", - " word_ids = tokenized_inputs.word_ids(batch_index=i)\n", - " previous_word_idx = None\n", - " label_ids = []\n", - " for word_idx in word_ids:\n", - " # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n", - " # ignored in the loss function.\n", - " if word_idx is None:\n", - " label_ids.append(-100)\n", - " # We set the label for the first token of each word.\n", - " elif word_idx != previous_word_idx:\n", - " label_ids.append(label[word_idx])\n", - " # For the other tokens in a word, we set the label to either the current label or -100, depending on\n", - " # the label_all_tokens flag.\n", - " else:\n", - " label_ids.append(label[word_idx])\n", - " previous_word_idx = word_idx\n", - "\n", - " labels.append(label_ids)\n", - "\n", - " tokenized_inputs[\"labels\"] = labels\n", - " return tokenized_inputs\n", - "tokenized_datasets = tokenize_and_align_labels(short_dataset, label_list)\n", - "tokenized_val_datasets = tokenize_and_align_labels(val_dataset, val_label_list)\n", - "tokenized_test_datasets = tokenize_and_align_labels(test_dataset, test_label_list)\n", - "# print(tokenized_datasets)\n", - "# BERT's tokenizer returns the dataset in the form of a dictionary of lists (sentences). \n", - "# we have to convert it into a list of dictionaries for training.\n", - "def turn_dict_to_list_of_dict(d):\n", - " new_list = []\n", - "\n", - " for labels, inputs in zip(d[\"labels\"], d[\"input_ids\"]):\n", - " entry = {\"input_ids\": inputs, \"labels\": labels}\n", - " new_list.append(entry)\n", - "\n", - " return new_list\n", - "tokenised_train = turn_dict_to_list_of_dict(tokenized_datasets)\n", - "tokenised_val = turn_dict_to_list_of_dict(tokenized_val_datasets)\n", - "tokenised_test = turn_dict_to_list_of_dict(tokenized_test_datasets)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\Main\\anaconda3\\envs\\COM3029\\lib\\site-packages\\datasets\\load.py:756: FutureWarning: The repository for seqeval contains custom code which must be executed to correctly load the metric. You can inspect the repository content at https://raw.githubusercontent.com/huggingface/datasets/2.18.0/metrics/seqeval/seqeval.py\n", - "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n", - "Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.\n", - " warnings.warn(\n", - "c:\\Users\\Main\\anaconda3\\envs\\COM3029\\lib\\site-packages\\accelerate\\accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n", - "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "from transformers import DataCollatorForTokenClassification\n", - "data_collator = DataCollatorForTokenClassification(tokenizer)\n", - "import numpy as np\n", - "\n", - "metric = load_metric(\"seqeval\")\n", - "def compute_metrics(p):\n", - " predictions, labels = p\n", - " predictions = np.argmax(predictions, axis=2)\n", - "\n", - " # Remove ignored index (special tokens)\n", - " true_predictions = [\n", - " [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", - " for prediction, label in zip(predictions, labels)\n", - " ]\n", - " true_labels = [\n", - " [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n", - " for prediction, label in zip(predictions, labels)\n", - " ]\n", - "\n", - " results = metric.compute(predictions=true_predictions, references=true_labels)\n", - " return {\n", - " \"precision\": results[\"overall_precision\"],\n", - " \"recall\": results[\"overall_recall\"],\n", - " \"f1\": results[\"overall_f1\"],\n", - " \"accuracy\": results[\"overall_accuracy\"],\n", - " }\n", - "from transformers import TrainingArguments, Trainer, EarlyStoppingCallback\n", - "\n", - "# Training arguments (feel free to play arround with these values)\n", - "model_name = \"bert-base-uncased\"\n", - "epochs = 5\n", - "batch_size = 4\n", - "learning_rate = 2e-5\n", - "\n", - "args = TrainingArguments(\n", - " f\"BERT-finetuned-NER\",\n", - " # evaluation_strategy = \"epoch\", ## Instead of focusing on loss and accuracy, we will focus on the F1 score\n", - " evaluation_strategy ='steps',\n", - " eval_steps = 7000,\n", - " save_total_limit = 3,\n", - " learning_rate=learning_rate,\n", - " per_device_train_batch_size=batch_size,\n", - " per_device_eval_batch_size=batch_size,\n", - " num_train_epochs=epochs,\n", - " weight_decay=0.001,\n", - " save_steps=35000,\n", - " metric_for_best_model = 'f1',\n", - " load_best_model_at_end=True,\n", - " logging_dir='logs',\n", - " logging_steps=500,\n", - ")\n", - "\n", - "trainer = Trainer(\n", - " model,\n", - " args,\n", - " train_dataset=tokenised_train,\n", - " eval_dataset=tokenised_val,\n", - " data_collator = data_collator,\n", - " tokenizer=tokenizer,\n", - " compute_metrics=compute_metrics,\n", - " callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "05efc63bbdbd41ab943752bef926cb0c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1340 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'loss': 0.0071, 'learning_rate': 1.2537313432835823e-05, 'epoch': 1.87}\n", - "{'loss': 0.0036, 'learning_rate': 5.074626865671642e-06, 'epoch': 3.73}\n", - "{'train_runtime': 595.4785, 'train_samples_per_second': 9.001, 'train_steps_per_second': 2.25, 'train_loss': 0.0044074685715917335, 'epoch': 5.0}\n" - ] - }, - { - "data": { - "text/plain": [ - "TrainOutput(global_step=1340, training_loss=0.0044074685715917335, metrics={'train_runtime': 595.4785, 'train_samples_per_second': 9.001, 'train_steps_per_second': 2.25, 'train_loss': 0.0044074685715917335, 'epoch': 5.0})" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "trainer.train()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2e4ad56ce7ce4c82a6072a110a483010", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/39 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "{'0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 0, 1, 0, 0, 0, 0, 0, 0, 2, 3, 3, 1, 0, 1, 0, 0, 0, 0, 0, 0]': {'precision': 0.726027397260274,\n", - " 'recall': 0.7940074906367042,\n", - " 'f1': 0.7584973166368516,\n", - " 'number': 267},\n", - " '0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 1, 0, 0, 0, 0]': {'precision': 0.6468590831918506,\n", - " 'recall': 0.710820895522388,\n", - " 'f1': 0.6773333333333333,\n", - " 'number': 536},\n", - " '0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 0, 1, 0, 0, 0, 0, 2, 3, 3, 0, 1, 0, 0, 0, 0, 0]': {'precision': 0.6932515337423313,\n", - " 'recall': 0.7583892617449665,\n", - " 'f1': 0.7243589743589743,\n", - " 'number': 149},\n", - " '1, 0, 2, 3, 3, 0]': {'precision': 0.6644736842105263,\n", - " 'recall': 0.7829457364341085,\n", - " 'f1': 0.7188612099644129,\n", - " 'number': 129},\n", - " 'overall_precision': 0.6747491638795987,\n", - " 'overall_recall': 0.7465309898242368,\n", - " 'overall_f1': 0.7088274044795785,\n", - " 'overall_accuracy': 0.9245226281762141}" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Prepare the test data for evaluation in the same format as the training data\n", - "\n", - "predictions, labels, _ = trainer.predict(tokenised_test)\n", - "predictions = np.argmax(predictions, axis=2)\n", - "\n", - "# Remove the predictions for the [CLS] and [SEP] tokens \n", - "true_predictions = [\n", - " [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", - " for prediction, label in zip(predictions, labels)\n", - "]\n", - "true_labels = [\n", - " [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n", - " for prediction, label in zip(predictions, labels)\n", - "]\n", - "\n", - "# Compute multiple metrics on the test restuls\n", - "results = metric.compute(predictions=true_predictions, references=true_labels)\n", - "results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "overall_precision\t0.674749164\t\t\t\t\n", - "overall_recall\t\t0.74653099\t\t\t\n", - "overall_f1\t\t\t0.708827404\t\t\n", - "overall_accuracy\t0.924522628\n" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "data = []\n", - "for sequence, metrics in results.items():\n", - " if 'overall' in sequence:\n", - " # Overall metrics don't have a sequence or number\n", - " data.append({\n", - " 'Sequence': sequence,\n", - " 'Precision': metrics if sequence == 'overall_precision' else '',\n", - " 'Recall': metrics if sequence == 'overall_recall' else '',\n", - " 'F1': metrics if sequence == 'overall_f1' else '',\n", - " 'Number': '',\n", - " 'Accuracy': metrics if sequence == 'overall_accuracy' else ''\n", - " })\n", - " else:\n", - " data.append({\n", - " 'Sequence': sequence,\n", - " 'Precision': metrics['precision'],\n", - " 'Recall': metrics['recall'],\n", - " 'F1': metrics['f1'],\n", - " 'Number': metrics['number'],\n", - " 'Accuracy': '' \n", - " })\n", - "\n", - "# Create a pandas DataFrame\n", - "df = pd.DataFrame(data)\n", - "\n", - "# Save the DataFrame to a CSV file\n", - "df.to_csv('BERT/results.csv', index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "COM3029", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/predictions_with_accuracy.csv b/predictions_with_accuracy.csv deleted file mode 100644 index d65d5f6..0000000 --- a/predictions_with_accuracy.csv +++ /dev/null @@ -1,33 +0,0 @@ -Text,True Labels,Predicted Labels,Accuracy,F1 Score -"['Abbreviations', ':', '<unk>', ',', 'Global', '<unk>', '<unk>', 'Study', ';', 'VIP', ',', '<unk>', 'improved', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -"['<unk>', 'from', 'FPLC', 'purification', 'were', 'treated', 'with', '<unk>', 'buffer', '[', '<unk>', ']', 'with', '10', 'mM', '<unk>', '-', 'dithiothreitol', '(', '<unk>', ')', 'and', '<unk>', 'for', '5', 'm', 'at', '<unk>', '°', 'C', 'then', 'analyzed', 'on', 'a', '4', '%', 'to', '15', '%', '<unk>', 'SDS', 'gel', 'with', 'a', '6', '%', '<unk>', 'gel', '<unk>', 'at', 'ambient', 'temperature', 'at', 'a', '<unk>', '100', '<unk>', 'Two', 'epithelial', 'cytokines', 'other', 'than', '<unk>', ',', '<unk>', ',', 'and', '<unk>', 'stromal', '<unk>', '(', '<unk>', ')', 'are', 'known', 'to', 'activate', '<unk>', 'in', 'the', 'lung', '[', '<unk>', ']', '.']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O']",0.9176470588235294,0.9541633921844966 -"['We', 'developed', 'a', 'variant', 'of', 'gene', 'set', 'enrichment', 'analysis', '(', '<unk>', ')', 'to', 'determine', 'whether', 'a', 'genetic', 'pathway', 'shows', 'evidence', 'for', 'age', 'regulation', '[', '23', ']', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9411764705882353,0.9541633921844966 -"['Red', 'represents', 'samples', 'having', 'the', 'normalized', '<unk>', 'and/or', '<unk>', 'values', 'in', 'cancer', 'tissues', '<unk>', '1.1', 'folds', 'of', 'normal', 'tissues', '(', 'of', 'which', 'enhanced', '<unk>', 'and/or', '<unk>', 'level', 'may', 'be', '<unk>', 'of', 'dominant', 'survival', 'mode', 'of', '<unk>', 'signaling', ')', ';', 'blue', 'represents', 'samples', 'having', 'both', 'normalized', '<unk>', 'and', '<unk>', 'values', '<', '1.1', '(', 'of', 'which', '<unk>', 'and', '<unk>', 'levels', 'less', 'than', 'or', 'equal', 'to', 'normal', 'may', 'be', '<unk>', 'of', 'the', 'apoptosis', '-', '<unk>', 'mode', 'of', '<unk>', 'signaling', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-AC', 'B-O', 'B-LF', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.8823529411764706,0.9541633921844966 -"['(', 'D', ')', 'Lysates', 'of', '<unk>', 'cancer', 'tissues', 'before', 'and', 'after', '<unk>', '(', 'RT', ')', 'from', 'each', 'of', 'the', '<unk>', 'patients', 'were', 'subjected', 'to', 'SDS', '-', 'PAGE', 'and', 'immunoblotting', 'with', 'antibodies', 'against', '<unk>', ',', '<unk>', ',', 'and', 'C', '-', 'terminal', '(', 'C', '-', '<unk>', ')', 'of', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9647058823529412,0.9541633921844966 -"['<unk>', 'risks', '(', 'RRs', ')', 'with', '95', '%', 'confidence', 'intervals', '(', 'CIs', ')', 'are', 'reported', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-AC', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9764705882352941,0.9541633921844966 -"['Increasing', 'concentrations', 'of', 'the', 'p53', '<unk>', 'significantly', 'decrease', 'the', 'levels', 'of', '<unk>', 'but', 'not', 'the', 'endogenous', '<unk>', 'proteins', 'in', 'western', '<unk>', 'of', 'both', '<unk>', 'and', 'pellet', 'fractions', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9647058823529412,0.9541633921844966 -"['(', 'B', ')', 'A', 'decrease', 'in', 'p53', 'as', 'the', 'result', 'of', 'shRNA', 'knockdown', 'increases', 'the', 'levels', 'of', '<unk>', 'but', 'not', 'WT', 'SOD1', 'proteins', 'in', 'the', '<unk>', 'aggregation', 'assay', ',', 'as', 'shown', 'by', 'western', '<unk>', 'of', 'both', '<unk>', '(', 'S', ')', '(', 'n', '=', '2', ')', 'and', 'pellet', '(', 'P', ')', '(', 'n', '=', '3', ')', 'fractions', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['(', 'C', ')', 'A', 'complete', 'absence', 'of', 'p53', 'increases', 'the', 'accumulation', 'of', '<unk>', 'mutant', 'proteins', 'in', '<unk>', '–', 'HCT116', 'cells', 'when', 'compared', 'with', 'controls', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -"['<unk>', 'sodium', '(', '<unk>', ',', 'Fig', '2.9', ')', 'is', 'the', 'only', 'included', 'NP', 'of', '<unk>', 'origin', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['I', 'have', 'read', 'the', 'journal', ""'s"", 'policy', 'and', 'the', 'authors', 'of', 'this', 'manuscript', 'have', 'the', 'following', '<unk>', '<unk>', ':', 'DF', ',', '<unk>', ',', 'and', '<unk>', 'are', 'staff', 'members', 'of', 'the', 'World', 'Health', 'Organization', '(', 'WHO', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9882352941176471,0.9541633921844966 -"['Characteristics', 'were', 'compared', 'across', 'the', 'matched', 'groups', 'using', 'standardized', 'mean', 'differences', '(', '<unk>', ')', ',', 'with', 'an', 'SMD', '>', '0.1', 'indicating', 'a', 'clinically', 'important', 'difference', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['I-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9764705882352941,0.9541633921844966 -"['<unk>', 'that', 'are', 'rich', 'in', '<unk>', 'were', 'selected', 'to', 'be', 'included', 'this', 'study', ':', 'protein', ',', '<unk>', 'acid', '(', '<unk>', ')', 'and', '<unk>', 'acid', '(', '<unk>', ')', ',', '<unk>', '(', 'A', ',', 'D', ',', '<unk>', ',', 'and', '<unk>', ')', ',', 'and', '<unk>', '(', 'zinc', 'and', '<unk>', ',', 'and', 'iron', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['To', 'test', 'if', 'this', 'polymorphism', '<unk>', 'CD24', 'mRNA', 'stability', ',', 'we', 'constructed', 'two', '<unk>', '(', '<unk>', '<unk>', '-', '<unk>', 'and', '<unk>', '<unk>', '-', '<unk>', ';', 'Figure', '5', ',', 'top', 'panel', ')', 'and', 'transfected', 'Chinese', 'hamster', '<unk>', '(', '<unk>', ')', 'cells', 'with', 'the', 'two', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9764705882352941,0.9541633921844966 -"['Blood', 'pressure', '(', 'BP', ')', 'will', 'be', 'controlled', 'after', 'each', 'visit', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -"['<unk>', ',', '<unk>', '<unk>', 'is', 'calculated', 'using', '<unk>', 'age', 'and', '9', '<unk>', '(', '<unk>', ',', 'creatinine', ',', 'glucose', ',', '[', 'log', ']', 'C', '-', 'reactive', 'protein', '[', 'CRP', ']', ',', 'lymphocyte', 'percent', ',', 'mean', 'cell', 'volume', ',', 'red', 'blood', 'cell', 'distribution', '<unk>', ',', 'alkaline', 'phosphatase', ',', 'and', 'white', 'blood', 'cell', 'count', ')', 'that', 'were', 'selected', 'using', 'a', 'Cox', 'proportional', 'hazard', '<unk>', 'net', 'model', 'for', 'mortality', 'based', 'on', '10', '-', 'fold', 'cross', '-', 'validation', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-O', 'B-O', 'B-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-O', 'I-LF', 'B-O', 'B-O', 'B-O', 'I-LF', 'I-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'I-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.8470588235294118,0.9541633921844966 -"['However', ',', 'c', '-', 'Fos', ',', '<unk>', ',', 'and', 'PIP3', 'formation', 'were', 'not', 'elevated', 'in', 'response', 'to', '<unk>', 'in', 'obese', '<unk>', '-', 'deficient', 'mice', '6', 'wk', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9764705882352941,0.9541633921844966 -"['(', 'Figures', '<unk>', ',', 'B', 'and', '<unk>', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -"['No', 'significant', 'changes', 'were', 'found', 'in', '<unk>', 'hypothalamus', '(', '<unk>', ')', 'baseline', 'mRNA', 'expression', 'of', '<unk>', 'and', '<unk>', 'and', '<unk>', '-', 'regulated', 'transcript', '(', '<unk>', ')', 'mRNA', '6', 'wk', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9411764705882353,0.9541633921844966 -"['(', 'Figure', '<unk>', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -"['It', 'has', 'been', 'found', 'that', 'control', 'has', 'been', 'centered', 'around', 'cyclin', '-', 'dependent', 'protein', 'kinases', '(', '<unk>', ')', ',', 'which', '<unk>', 'the', 'major', 'events', 'of', 'the', '<unk>', 'cell', 'cycle', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['The', '<unk>', 'hydroxylase', ',', '<unk>', 'factor', '<unk>', '<unk>', '<unk>', '(', '<unk>', ')', '(', '<unk>', 'accession', 'number', ':', '<unk>', ')', ',', '<unk>', 'an', '<unk>', 'residue', 'within', '<unk>', 'subunits', '(', '<unk>', 'on', '<unk>', ')', ',', 'resulting', 'in', '<unk>', 'inhibition', 'of', 'its', 'interaction', 'with', 'the', 'transcriptional', 'co', '-', 'activator', '<unk>', '/', '<unk>', ',', 'thereby', '<unk>', '<unk>', '-', 'dependent', 'transcription', '[', '2', ']', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-AC', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['The', 'manuscript', 'by', '<unk>', 'et', 'al', '<unk>', 'the', 'relationships', 'between', 'bile', 'acid', '(', '<unk>', ')', 'levels', '/', 'synthesis', 'and', 'dementia', 'related', 'pathology', ',', 'such', 'as', 'white', 'matter', 'lesions', '(', '<unk>', ')', 'and', '<unk>', 'deposition', ',', 'as', 'well', 'as', 'vascular', 'dementia', 'risk', 'and', 'sex', 'related', 'differences', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['However', ',', 'current', 'literature', 'on', 'the', 'stability', 'of', 'the', '<unk>', 'in', '<unk>', 'is', 'very', 'limited', ',', '<unk>', 'exclusively', 'with', '<unk>', '[', '<unk>', ']', 'or', '<unk>', '(', '<unk>', ')', '[', '17', ']', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['CD', 'is', 'the', 'most', 'common', 'form', 'of', 'inflammatory', '<unk>', 'disease', ',', 'the', 'other', 'being', '<unk>', '<unk>', '(', '<unk>', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9647058823529412,0.9541633921844966 -"['We', '<unk>', 'a', 'cohort', 'of', '<unk>', '<unk>', '<unk>', 'patients', '(', '<unk>', ')', 'for', '<unk>', '(', '<unk>', ')', ',', '<unk>', '(', '<unk>', ')', ',', 'and', 'the', 'novel', '<unk>', 'locus', '(', '<unk>', ')', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9764705882352941,0.9541633921844966 -"['<unk>', 'after', '<unk>', 'surgery', 'is', 'an', '<unk>', 'but', 'a', '<unk>', '<unk>', ']', 'The', 'reported', 'incidence', 'of', '<unk>', 'after', '<unk>', 'surgery', '<unk>', 'from', '0.05', '%', 'to', '<unk>', '%', 'with', '20', '-', 'G', '<unk>', ']', '<unk>', 'reports', 'showed', 'higher', 'rates', 'of', 'post', '-', '<unk>', 'invasive', '<unk>', 'surgery', '(', '<unk>', ')', '<unk>', ',', 'whereas', 'recent', 'reports', 'have', 'shown', 'a', '<unk>', '<unk>', ']', 'Body', 'mass', 'index', '(', 'BMI', ')', 'was', 'calculated', 'as', 'the', 'weight', 'in', '<unk>', 'divided', 'by', 'the', 'square', 'of', 'the', 'height', 'in', '<unk>', '.', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>']",0.8588235294117647,0.9541633921844966 -"['<unk>', 'data', 'are', 'available', 'from', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -"['#', 'Cells', ',', 'number', 'of', 'cells', ';', '<unk>', 'CA', ',', 'average', 'cell', 'area', ';', '<unk>', ',', 'a', '<unk>', 'factor', ';', '<unk>', 'A', ',', 'leaf', 'area', ';', '<unk>', ',', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'I-LF', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-LF', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9411764705882353,0.9541633921844966 -"['The', 'best', 'linear', '<unk>', 'estimates', '(', '<unk>', ')', 'of', 'the', '<unk>', '<unk>', 'families', 'were', 'estimated', 'following', 'the', 'model', ':', '<unk>', '=', '<unk>', ',', 'where', '<unk>', 'is', 'the', 'phenotype', 'of', 'the', 'ith', '(', 'i', '=', '1,2', '…', ',', '<unk>', ')', 'genotype', 'in', 'the', 'jth', '(', '<unk>', '=', '<unk>', ')', '<unk>', ',', 'the', '<unk>', '(', 'm', '=', '1,2', ')', 'replicate', 'effect', 'was', '<unk>', 'in', 'each', '<unk>', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-LF', 'I-LF', 'I-LF', 'I-LF', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9529411764705882,0.9541633921844966 -"['<unk>', 'is', 'the', 'overall', 'mean', ',', '<unk>', 'is', 'the', 'genotype', 'effect', ',', '<unk>', 'is', 'the', '<unk>', 'effect', ',', '<unk>', 'is', 'the', '<unk>', 'effect', ',', '<unk>', 'is', 'the', 'replicate', 'effect', ',', 'and', '<unk>', '<unk>', 'N', '(', '0', ',', '<unk>', ')', 'is', 'the', 'error', 'term', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-AC', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-O', 'B-LF', 'B-O', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",0.9764705882352941,0.9541633921844966 -"['KO', ',', 'knockout', ';', '<unk>', ',', 'postsynaptic', 'density', '.', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-AC', 'B-O', 'B-LF', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']","['B-AC', 'B-O', 'B-LF', 'B-O', 'B-AC', 'B-O', 'B-LF', 'I-LF', 'B-O', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']",1.0,0.9541633921844966 -- GitLab