| from nltk.tokenize import sent_tokenize | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| import src.exception.Exception.Exception as ExceptionCustom | |
| METHOD = "TRANSLATE" | |
| tokenizerROMENG = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-ro-en") | |
| modelROMENG = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-ro-en") | |
| tokenizerENGROM = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-en-ro") | |
| modelENGROM = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-en-ro") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| modelROMENG.to(device) | |
| modelENGROM.to(device) | |
| def paraphraseTranslateMethod(requestValue : str): | |
| exception = "" | |
| result_value = "" | |
| exception = ExceptionCustom.checkForException(requestValue, METHOD) | |
| if exception != "": | |
| return "", exception | |
| tokenized_sent_list = sent_tokenize(requestValue) | |
| for SENTENCE in tokenized_sent_list: | |
| input_ids1 = tokenizerROMENG(SENTENCE, return_tensors='pt').to(device) | |
| output1 = modelROMENG.generate( | |
| input_ids=input_ids1.input_ids, | |
| do_sample=True, | |
| max_length=256, | |
| top_k=90, | |
| top_p=0.97, | |
| early_stopping=False | |
| ) | |
| result1 = tokenizerROMENG.batch_decode(output1, skip_special_tokens=True)[0] | |
| input_ids = tokenizerENGROM(result1, return_tensors='pt').to(device) | |
| output = modelENGROM.generate( | |
| input_ids=input_ids.input_ids, | |
| do_sample=True, | |
| max_length=256, | |
| top_k=90, | |
| top_p=0.97, | |
| early_stopping=False | |
| ) | |
| result = tokenizerENGROM.batch_decode(output, skip_special_tokens=True)[0] | |
| result_value += result + " " | |
| return result_value, "" |