From 8a81c4e2cfdd47cdf9b52141ee050093a44ee3a3 Mon Sep 17 00:00:00 2001 From: "Hafiz, Aqib (PG/T - Comp Sci & Elec Eng)" <ah02821@surrey.ac.uk> Date: Thu, 23 May 2024 16:05:45 +0000 Subject: [PATCH] Upload New File --- tokenizer_config.json | 55 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tokenizer_config.json diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..fab1cec --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "do_lower_case": true, + "mask_token": "[MASK]", + "model_max_length": 512, + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "DistilBertTokenizer", + "unk_token": "[UNK]" +} -- GitLab