From ae6201c2d816767a0c758e0c24bb86af17c4dce7 Mon Sep 17 00:00:00 2001 From: "Menezes, Luke J (PG/T - Comp Sci & Elec Eng)" <lm01906@surrey.ac.uk> Date: Thu, 23 May 2024 11:21:19 +0000 Subject: [PATCH] Upload New File --- SciBERT-finetuned-NER/tokenizer_config.json | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 SciBERT-finetuned-NER/tokenizer_config.json diff --git a/SciBERT-finetuned-NER/tokenizer_config.json b/SciBERT-finetuned-NER/tokenizer_config.json new file mode 100644 index 0000000..feeb7cb --- /dev/null +++ b/SciBERT-finetuned-NER/tokenizer_config.json @@ -0,0 +1,57 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "do_basic_tokenize": true, + "do_lower_case": true, + "mask_token": "[MASK]", + "model_max_length": 1000000000000000019884624838656, + "never_split": null, + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "unk_token": "[UNK]" +} -- GitLab