Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import glob | |
| from tqdm import tqdm | |
| from google.cloud import translate | |
| # Expects a json file containing the API credentials. | |
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.join( | |
| os.path.dirname(__file__), r"api_key.json" | |
| ) | |
| flores_to_iso = { | |
| "asm_Beng": "as", | |
| "ben_Beng": "bn", | |
| "doi_Deva": "doi", | |
| "eng_Latn": "en", | |
| "gom_Deva": "gom", | |
| "guj_Gujr": "gu", | |
| "hin_Deva": "hi", | |
| "kan_Knda": "kn", | |
| "mai_Deva": "mai", | |
| "mal_Mlym": "ml", | |
| "mar_Deva": "mr", | |
| "mni_Mtei": "mni_Mtei", | |
| "npi_Deva": "ne", | |
| "ory_Orya": "or", | |
| "pan_Guru": "pa", | |
| "san_Deva": "sa", | |
| "sat_Olck": "sat", | |
| "snd_Arab": "sd", | |
| "tam_Taml": "ta", | |
| "tel_Telu": "te", | |
| "urd_Arab": "ur", | |
| } | |
| # Copy the project id from the json file containing API credentials | |
| def translate_text(text, src_lang, tgt_lang, project_id="project_id"): | |
| src_lang = flores_to_iso[src_lang] | |
| tgt_lang = flores_to_iso[tgt_lang] | |
| if src_lang == "mni_Mtei": | |
| src_lang = "mni-Mtei" | |
| if tgt_lang == "mni_Mtei": | |
| tgt_lang = "mni-Mtei" | |
| client = translate.TranslationServiceClient() | |
| location = "global" | |
| parent = f"projects/{project_id}/locations/{location}" | |
| response = client.translate_text( | |
| request={ | |
| "parent": parent, | |
| "contents": [text], | |
| "mime_type": "text/plain", # mime types: text/plain, text/html | |
| "source_language_code": src_lang, | |
| "target_language_code": tgt_lang, | |
| } | |
| ) | |
| translated_text = "" | |
| for translation in response.translations: | |
| translated_text += translation.translated_text | |
| return translated_text | |
| if __name__ == "__main__": | |
| root_dir = sys.argv[1] | |
| pairs = sorted(glob.glob(os.path.join(root_dir, "*"))) | |
| for pair in pairs: | |
| print(pair) | |
| basename = os.path.basename(pair) | |
| src_lang, tgt_lang = basename.split("-") | |
| if src_lang not in flores_to_iso.keys() or tgt_lang not in flores_to_iso.keys(): | |
| continue | |
| if src_lang == "eng_Latn": | |
| lang = tgt_lang | |
| else: | |
| lang = src_lang | |
| lang = flores_to_iso[lang] | |
| if lang not in "as bn doi gom gu hi kn mai ml mni_Mtei mr ne or pa sa sd ta te ur": | |
| continue | |
| print(f"{src_lang} - {tgt_lang}") | |
| # source to target translations | |
| src_infname = os.path.join(pair, f"test.{src_lang}") | |
| tgt_outfname = os.path.join(pair, f"test.{tgt_lang}.pred.google") | |
| if os.path.exists(src_infname) and not os.path.exists(tgt_outfname): | |
| src_sents = [ | |
| sent.replace("\n", "").strip() | |
| for sent in open(src_infname, "r").read().split("\n") | |
| if sent | |
| ] | |
| translations = [ | |
| translate_text(text, src_lang, tgt_lang).strip() for text in tqdm(src_sents) | |
| ] | |
| with open(tgt_outfname, "w") as f: | |
| f.write("\n".join(translations)) | |
| # # target to source translations | |
| tgt_infname = os.path.join(pair, f"test.{tgt_lang}") | |
| src_outfname = os.path.join(pair, f"test.{src_lang}.pred.google") | |
| if os.path.exists(tgt_infname) and not os.path.exists(src_outfname): | |
| tgt_sents = [ | |
| sent.replace("\n", "").strip() | |
| for sent in open(tgt_infname, "r").read().split("\n") | |
| if sent | |
| ] | |
| translations = [ | |
| translate_text(text, tgt_lang, src_lang).strip() for text in tqdm(tgt_sents) | |
| ] | |
| with open(src_outfname, "w") as f: | |
| f.write("\n".join(translations)) | |