diff --git a/SciBERT-finetuned-NER/tokenizer_config.json b/SciBERT-finetuned-NER/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..feeb7cb579e0ce178aa1abf656d6fcc51a690e90 --- /dev/null +++ b/SciBERT-finetuned-NER/tokenizer_config.json @@ -0,0 +1,57 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "do_basic_tokenize": true, + "do_lower_case": true, + "mask_token": "[MASK]", + "model_max_length": 1000000000000000019884624838656, + "never_split": null, + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "unk_token": "[UNK]" +}