| from transformers import BartTokenizer | |
| def main(): | |
| tokenizer = BartTokenizer.from_pretrained("facebook/bart-base") | |
| print(tokenizer.bos_token) | |
| print(tokenizer.cls_token) | |
| print(tokenizer.eos_token) | |
| print(tokenizer.sep_token) | |
| print(tokenizer.mask_token) | |
| print(tokenizer.pad_token) | |
| print(tokenizer.unk_token) | |
| """ | |
| <s> | |
| <s> | |
| </s> | |
| </s> | |
| <mask> | |
| <pad> | |
| <unk> | |
| right, so this is just like the symbols for BERT but in lowercase. | |
| bos = cls | |
| sep = eos | |
| would it be okay to use <idiom> = <sep>? | |
| no, sep implies that a sentence somehow ends. | |
| """ | |
| if __name__ == '__main__': | |
| main() | |