Spaces:

santoshtyss
/

analysis_demo

Build error

App Files Files

analysis_demo / app.py

santoshtyss

Update app.py

1a956e1 about 3 years ago

raw

history blame

38.4 kB

	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	import torch
	from mosestokenizer import *
	from indicnlp.tokenize import sentence_tokenize
	from docx import Document

	import os
	import torch
	import time
	import json
	from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

	from transformers import (
	AutoConfig,
	AutoModelForQuestionAnswering,
	AutoTokenizer,
	squad_convert_examples_to_features
	)

	from transformers.data.processors.squad import SquadResult, SquadV2Processor, SquadExample
	from transformers.data.metrics.squad_metrics import compute_predictions_logits


	os.system('git clone https://github.com/TheAtticusProject/cuad.git')
	os.system('mv cuad cuad-training')
	os.system('unzip cuad-training/data.zip -d cuad-data/')
	os.system('mkdir cuad-models')
	os.system('curl https://zenodo.org/record/4599830/files/roberta-base.zip?download=1 --output cuad-models/roberta-base.zip')
	os.system('unzip cuad-models/roberta-base.zip -d cuad-models/')



	trans_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M" )
	trans_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	trans_model = trans_model.to(device)



	lang_dict = {
	'english' : 'eng_Latn',
	'assamese' : 'asm_Beng',
	'awadhi' : 'awa_Deva' ,
	'bengali' : 'ben_Beng',
	'bhojpuri' : 'bho_Deva',
	'gujarati' : 'guj_Gujr',
	'hindi' : 'hin_Deva',
	'kannada' : 'kan_Knda',
	'kashmiri' : 'kas_Deva',
	'maithili' : 'mai_Deva',
	'malayalam' : 'mal_Mlym',
	'marathi' : 'mar_Deva',
	'odia' : 'ory_Orya',
	'punjabi' : 'pan_Guru',
	'sanskrit' : 'san_Deva',
	'sindhi' : 'snd_Arab' ,
	'tamil' : 'tam_Taml' ,
	'telugu' : 'tel_Telu',
	'urdu' : 'urd_Arab'
	}

	def translate_sentence(article, target):
	inputs = trans_tokenizer(article.replace("\"",""), return_tensors="pt").to(device)

	translated_tokens = trans_model.generate(
	**inputs, forced_bos_token_id=trans_tokenizer.lang_code_to_id[lang_dict[target]], max_length=100)

	return trans_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]



	INDIC_DICT = {"assamese" :"as", 'bengali' : 'bn', 'gujarati' : 'gu',
	'hindi' : 'hi',
	'kannada' : 'kn',
	'malayalam' : 'ml',
	'marathi' : 'mr',
	'odia' : 'or',
	'punjabi' : 'pa',
	'tamil' : 'ta' ,
	'telugu' : 'te'}

	def split_sentences(paragraph, language):
	if language in INDIC_DICT.keys():
	return sentence_tokenize.sentence_split(paragraph, lang=INDIC_DICT[language])
	elif language == 'en':
	with MosesSentenceSplitter('en') as splitter:
	return splitter([paragraph])
	else:
	return paragraph.split(".")

	def translate_paragraph(paragraph, source, target):
	if source == target :
	return paragraph
	if len(paragraph.split()) < 100:
	return translate_sentence(paragraph, target)
	else:
	sentences = split_sentences(paragraph, source)
	outputs = []
	for each_sentence in sentences:
	outputs.append(translate_sentence(each_sentence, target))
	return " ".join(outputs)

	def docx_replace(doc, data):
	paragraphs = list(doc.paragraphs)
	for t in doc.tables:
	for row in t.rows:
	for cell in row.cells:
	for paragraph in cell.paragraphs:
	paragraphs.append(paragraph)

	for each in data:
	key = list(each.keys())[0]
	val = list(each.values())[0]
	for p in paragraphs:
	#key_name = '${{{}}}'.format(key) # I'm using placeholders in the form ${PlaceholderName}
	key_name = key
	if key_name in p.text:
	#print(f'old one {p.text}')
	inline = p.runs
	# Replace strings and retain the same style.
	# The text to be replaced can be split over several runs so
	# search through, identify which runs need to have text replaced
	# then replace the text in those identified
	started = False
	key_index = 0
	# found_runs is a list of (inline index, index of match, length of match)
	found_runs = list()
	found_all = False
	replace_done = False
	for i in range(len(inline)):

	# case 1: found in single run so short circuit the replace
	if key_name in inline[i].text and not started:
	found_runs.append((i, inline[i].text.find(key_name), len(key_name)))
	text = inline[i].text.replace(key_name, str(val))
	inline[i].text = text
	replace_done = True
	found_all = True
	break

	if key_name[key_index] not in inline[i].text and not started:
	# keep looking ...
	continue

	# case 2: search for partial text, find first run
	if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started:
	# check sequence
	start_index = inline[i].text.find(key_name[key_index])
	check_length = len(inline[i].text)
	for text_index in range(start_index, check_length):
	if inline[i].text[text_index] != key_name[key_index]:
	# no match so must be false positive
	break
	if key_index == 0:
	started = True
	chars_found = check_length - start_index
	key_index += chars_found
	found_runs.append((i, start_index, chars_found))
	if key_index != len(key_name):
	continue
	else:
	# found all chars in key_name
	found_all = True
	break

	# case 2: search for partial text, find subsequent run
	if key_name[key_index] in inline[i].text and started and not found_all:
	# check sequence
	chars_found = 0
	check_length = len(inline[i].text)
	for text_index in range(0, check_length):
	if inline[i].text[text_index] == key_name[key_index]:
	key_index += 1
	chars_found += 1
	else:
	break
	# no match so must be end
	found_runs.append((i, 0, chars_found))
	if key_index == len(key_name):
	found_all = True
	break

	if found_all and not replace_done:
	for i, item in enumerate(found_runs):
	index, start, length = [t for t in item]
	if i == 0:
	text = inline[index].text.replace(inline[index].text[start:start + length], str(val))
	inline[index].text = text
	else:
	text = inline[index].text.replace(inline[index].text[start:start + length], '')
	inline[index].text = text
	#print(p.text)
	break

	input_output_trans = {}


	def translate_fill(document_name,output_file, src, trg):
	print("translate doc")

	doc = docx.Document(document_name)
	if doc.paragraphs[0].text in list(input_output_trans.keys()):
	lang_doc_dict = input_output_trans[doc.paragraphs[0].text]
	if trg in lang_doc_dict.keys():
	time.sleep(2)
	return lang_doc_dict[trg]

	template_document = Document(document_name)

	variables = []
	for paragraph in template_document.paragraphs:
	if(paragraph.text.strip() != ""):
	variables.append({paragraph.text : translate_paragraph(paragraph.text, src, trg)})

	for t in template_document.tables:
	for row in t.rows:
	for cell in row.cells:
	for paragraph in cell.paragraphs:
	if(paragraph.text.strip() != ""):
	variables.append({paragraph.text : translate_paragraph(paragraph.text, src, trg)})

	docx_replace(template_document, variables)
	template_document.save(output_file)
	return output_file



	def translate_txt(document_name, output_file, src, trg):
	print("translate text")
	with open(document_name) as fp:
	lines = fp.readlines()

	lines = [line.rstrip() for line in lines]

	with open(output_file, 'w') as f:
	for line in lines:
	if(line!=""):
	f.write( translate_paragraph(line, src, trg) + "\n")
	else:
	f.write("\n")

	return output_file


	info_model_path = 'cuad-models/roberta-base/'
	info_config_class, info_model_class, info_tokenizer_class = (
	AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer)
	info_config = info_config_class.from_pretrained(info_model_path)
	info_tokenizer = info_tokenizer_class.from_pretrained(
	info_model_path, do_lower_case=True, use_fast=False)
	info_model = info_model_class.from_pretrained(info_model_path, config=info_config)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	info_model.to(device)

	def run_prediction(question_texts, context_text):
	### Setting hyperparameters
	max_seq_length = 512
	doc_stride = 256
	n_best_size = 1
	max_query_length = 64
	max_answer_length = 512
	do_lower_case = False
	null_score_diff_threshold = 0.0

	# model_name_or_path = "../cuad-models/roberta-base/"

	def to_list(tensor):
	return tensor.detach().cpu().tolist()

	processor = SquadV2Processor()
	examples = []

	for i, question_text in enumerate(question_texts):
	example = SquadExample(
	qas_id=str(i),
	question_text=question_text,
	context_text=context_text,
	answer_text=None,
	start_position_character=None,
	title="Predict",
	answers=None,
	)

	examples.append(example)

	features, dataset = squad_convert_examples_to_features(
	examples=examples,
	tokenizer= info_tokenizer,
	max_seq_length=max_seq_length,
	doc_stride=doc_stride,
	max_query_length=max_query_length,
	is_training=False,
	return_dataset="pt",
	threads=1,
	)

	eval_sampler = SequentialSampler(dataset)
	eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=10)

	all_results = []

	for batch in eval_dataloader:
	info_model.eval()
	batch = tuple(t.to(device) for t in batch)

	with torch.no_grad():
	inputs = {
	"input_ids": batch[0],
	"attention_mask": batch[1],
	"token_type_ids": batch[2],
	}

	example_indices = batch[3]

	outputs = info_model(**inputs)

	for i, example_index in enumerate(example_indices):
	eval_feature = features[example_index.item()]
	unique_id = int(eval_feature.unique_id)

	output = [to_list(output[i]) for output in outputs.to_tuple()]

	start_logits, end_logits = output
	result = SquadResult(unique_id, start_logits, end_logits)
	all_results.append(result)

	final_predictions = compute_predictions_logits(
	all_examples=examples,
	all_features=features,
	all_results=all_results,
	n_best_size=n_best_size,
	max_answer_length=max_answer_length,
	do_lower_case=do_lower_case,
	output_prediction_file=None,
	output_nbest_file=None,
	output_null_log_odds_file=None,
	verbose_logging=False,
	version_2_with_negative=True,
	null_score_diff_threshold=null_score_diff_threshold,
	tokenizer=info_tokenizer
	)

	return final_predictions


	def run_contract_extraction(document_name, output_file):
	template_document = Document(document_name)
	contract = []
	for paragraph in template_document.paragraphs:
	if(paragraph.text.strip()!=''):
	contract.append(paragraph.text)

	contract = "\n".join(contract)
	questions = []

	with open('./cuad-data/CUADv1.json') as json_file:
	data = json.load(json_file)

	#with open('./cuad-data/questions.txt', 'w') as questions_file:
	for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
	question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
	questions.append(question)

	predictions = run_prediction(questions, contract)

	with open(output_file, 'w') as f:
	count = 1
	for i, p in enumerate(predictions):
	if(predictions[p]!=''):
	#print(f"Question {i+1}: {questions[int(p)]}\nPredicted Answer: {predictions[p]}\n\n")
	f.write("Question "+str(count)+": "+ questions[int(p)] +"\nPredicted Answer: "+ predictions[p]+ "\n\n")
	count += 1

	return output_file

	input_output_key = {}

	def run_key_clause(document_name, output_name,source_language):
	doc = docx.Document(document_name)
	if doc.paragraphs[0].text in list(input_output_key.keys()):
	time.sleep(2)
	return input_output_key[doc.paragraphs[0].text]

	if source_language != 'english':
	translation_output = translate_fill(document_name, "info_translation.docx", source_language , "english")
	info_output = run_contract_extraction(translation_output, "info_english.txt")
	final_info = translate_txt(info_output, output_name, "english",source_language)

	else:
	final_info = run_contract_extraction(document_name, output_name)

	return final_info


	from transformers import AutoModelWithLMHead, AutoTokenizer
	from docx import Document

	qg_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
	qg_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	qg_model.to(device)

	def get_question(answer, context, max_length=64):
	input_text = "answer: %s context: %s </s>" % (answer, context)
	features = qg_tokenizer([input_text], return_tensors='pt').to(device)

	output = qg_model.generate(input_ids=features['input_ids'],
	attention_mask=features['attention_mask'],
	max_length=max_length)

	return qg_tokenizer.decode(output[0])


	def run_fill_questions(document_name, output_file, questions_file, delimiter):
	print("QGenerations")
	prev_para = ''
	count = 0
	variables = []
	questions = []

	doc = Document(document_name)

	for paragraph in doc.paragraphs:
	if(paragraph.text.strip()==''):
	continue
	if(paragraph.text.count(delimiter)>0):
	var_count = paragraph.text.count(delimiter)
	format_str = paragraph.text.replace(delimiter, '{}')
	new_string = format_str.format(*('id'+str(i) for i in range(count,count+var_count)))

	answers = ['id'+str(i) for i in range(count,count+var_count)]

	if (len(new_string.split())<10):
	context = prev_para + " " + new_string
	else:
	context = new_string


	for answer in answers:
	question_string = get_question(answer, context).replace('<pad> question:','').replace('</s>','').strip()
	question = "{{"+question_string+"}}"
	questions.append(question_string)
	new_string = new_string.replace(answer, question)

	count += var_count
	variables.append({paragraph.text : new_string })

	prev_para = paragraph.text

	with open(questions_file, 'w') as f:
	count = 1
	for p in questions:
	f.write("Question "+str(count)+": "+ p +"\n")
	count += 1


	docx_replace(doc, variables)
	doc.save(output_file)
	return output_file, questions_file


	def extract_questions(document_name, output_file):
	questions = []
	doc = Document(document_name)

	for paragraph in doc.paragraphs:
	if(paragraph.text.strip()==''):
	continue
	else:
	q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
	questions.extend(q)


	with open(output_file, 'w') as f:
	count = 1
	for p in questions:
	f.write("Question "+str(count)+": "+ p +"\n")
	count += 1

	return output_file

	input_output_qg = {}


	def run_generate_questions(document_name, output_file, questions_file, delimiter, source_language):
	doc = docx.Document(document_name)
	if doc.paragraphs[0].text in list(input_output_qg.keys()):
	qg_output = input_output_qg[doc.paragraphs[0].text]
	q_output = extract_questions(qg_output, questions_file)
	time.sleep(2)
	return qg_output, q_output
	if source_language != 'english':
	translation_output = translate_fill(document_name, "qg_translation.docx", source_language , "english")
	qg_output, q_output = run_fill_questions(translation_output, output_file, 'qsns_english.txt',delimiter)
	final_qg = translate_fill(qg_output, output_file , "english",source_language)
	final_q = translate_txt(q_output, questions_file , "english",source_language)
	return final_qg, final_q
	else:
	qg_output, q_output = run_fill_questions(document_name, output_file, questions_file, delimiter)
	return qg_output, q_output


	import docx
	import random
	from docx.shared import RGBColor
	import time
	import re

	input_output_red = {}

	def run_redflags(filename, output_file):
	print("Red flags")
	doc = docx.Document(filename)
	if doc.paragraphs[0].text in list(input_output_red.keys()):
	return input_output_red[doc.paragraphs[0].text]
	else:
	for para in doc.paragraphs:
	inline = para.runs
	colour = False
	if (len(para.text.split())>10) and random.random()>0.8:
	colour = True
	if colour:
	for i in range(len(inline)):
	inline[i].font.color.rgb = RGBColor(255, 000, 000)

	time.sleep(2)
	doc.save(output_file)
	return output_file


	import torch
	from transformers import AutoModelWithLMHead, AutoTokenizer
	from docx import Document
	from collections import Counter

	rc_tokenizer = AutoTokenizer.from_pretrained("tuner007/t5_abs_qa")
	rc_model = AutoModelWithLMHead.from_pretrained("tuner007/t5_abs_qa")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	rc_model = rc_model.to(device)

	def get_answer(question, context):
	input_text = "context: %s <question for context: %s </s>" % (context,question)
	features = rc_tokenizer([input_text], return_tensors='pt')
	out = rc_model.generate(input_ids=features['input_ids'].to(device), attention_mask=features['attention_mask'].to(device))
	return rc_tokenizer.decode(out[0])

	def extract_questions_for_info(document_name):
	questions = []
	doc = Document(document_name)

	for paragraph in doc.paragraphs:
	if(paragraph.text.strip()==''):
	continue
	else:
	q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
	questions.extend(q)
	return questions


	def extract_info(questions, context):
	variables = []
	unanswered = []
	max_length = 512 # The maximum length of a feature (question and context)
	doc_stride = 256


	for question in questions:
	tokenized_example = rc_tokenizer(
	str(question),
	str(context.replace('\'','').replace('"',"")),
	max_length=max_length,
	truncation="only_second",
	return_overflowing_tokens=True,
	stride=doc_stride)
	answers = []
	for x in tokenized_example["input_ids"]:
	q, c = rc_tokenizer.decode(x).split("</s>")[0], rc_tokenizer.decode(x).split("</s>")[1]
	answers.append(get_answer(q, c).replace('<pad>','').replace('</s>','').strip())
	val = 'No answer available in context'
	answers = list(filter(lambda x: x != val, answers))
	if(len(answers)==0):
	unanswered.append(question)
	else:
	fre_list = Counter(answers)
	answer = fre_list.most_common(1)[0][0]
	variables.append({"{{"+question+"}}" : answer})
	return variables, unanswered

	input_output_exin = {}

	def run_extract_info(document_name, context, output_file, source_language):
	print("Extract")
	doc = docx.Document(document_name)

	if doc.paragraphs[0].text in list(input_output_exin.keys()):
	exin_output = input_output_exin[doc.paragraphs[0].text]
	exin_unanswered = extract_questions_for_info(exin_output)
	time.sleep(2)
	return exin_output, exin_unanswered
	else:
	if source_language != 'english':
	translation_output = translate_fill(document_name, "exin_translation.docx", source_language , "english")
	questions = extract_questions_for_info(translation_output )
	context = translate_paragraph(context)

	variables, unanswered = extract_info(questions, context)
	template_document = Document(document_name)
	docx_replace(template_document, variables)
	template_document.save("exin_modified.docx")

	final_exin = translate_fill("exin_modified.docx", output_file , "english",source_language)
	unans_exin = [translate_paragraph(each, "english",source_language) for each in unanswered]
	return final_exin, unans_exin

	questions = extract_questions_for_info(document_name)
	variables, unanswered = extract_info(questions, context)
	print(variables)
	template_document = Document(document_name)
	docx_replace(template_document, variables)
	template_document.save(output_file)
	return output_file, unanswered

	import docx
	import random
	from docx.shared import RGBColor
	import time
	import re
	from docx import Document

	from docx.enum.text import WD_COLOR_INDEX

	from transformers import AutoTokenizer, AutoModel
	import torch
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np


	similar_tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert' )
	similar_model = AutoModel.from_pretrained('ai4bharat/indic-bert' )
	similar_model.eval()

	def obtain_rep(documents):
	# initialize dictionary to store tokenized sentences
	mean_pooled = []
	with torch.no_grad():
	for sentence in documents:
	# encode each sentence and append to dictionary
	tokens = {'input_ids': [], 'attention_mask': []}

	new_tokens = similar_tokenizer.encode_plus(sentence, max_length=128,
	truncation=True, padding='max_length',
	return_tensors='pt')
	tokens['input_ids'].append(new_tokens['input_ids'][0])
	tokens['attention_mask'].append(new_tokens['attention_mask'][0])
	tokens['input_ids'] = torch.stack(tokens['input_ids'])
	tokens['attention_mask'] = torch.stack(tokens['attention_mask'])

	outputs = similar_model(**tokens)
	mean_pooled.append(outputs.pooler_output)

	return torch.stack(mean_pooled).squeeze(1)

	def similarity(documents, clauses):
	clauses = clauses.detach().numpy()
	documents = documents.detach().numpy()
	sim = cosine_similarity(clauses,documents)
	max_sim = np.max(sim, axis=0)
	return max_sim

	def fill_yellow(filename, output_file, highlighted_paras):
	doc = docx.Document(filename)
	for each in highlighted_paras:
	for para in doc.paragraphs:
	inline = para.runs
	colour = False
	if each in para.text:
	colour = True
	if colour:
	for i in range(len(inline)):
	inline[i].font.highlight_color = WD_COLOR_INDEX.YELLOW
	break
	doc.save(output_file)
	return output_file


	def get_similar_clauses(filename, output_file,clauses, source_language):
	paras = []
	template_document = Document(filename)
	contract = []
	for paragraph in template_document.paragraphs:
	if(paragraph.text.strip()!=''):
	contract.append(paragraph.text)

	sentence_batch = []

	for paragraph in contract:
	sentence_batch.extend(split_sentences(paragraph, source_language))

	sentence_batch = [each for each in sentence_batch if each!=' ' and len(each.split())>5]

	doc_rep = obtain_rep(sentence_batch)
	clause_rep = obtain_rep(clauses)
	k = similarity(doc_rep, clause_rep)
	pick_top = max(int(0.1*len(sentence_batch)),3)
	ind = k.argsort()[-pick_top:][::-1]
	for each_idx in ind:
	paras.append(sentence_batch[each_idx])

	output_file = fill_yellow(filename, output_file, paras)
	highlighted_paras = get_highlighted_clauses(output_file)
	return output_file, highlighted_paras


	input_output_similar = {}

	def get_highlighted_clauses(filename):
	doc = docx.Document(filename)
	para_highlighted = []
	for para in doc.paragraphs:
	inline = para.runs
	colour = False
	for i in range(len(inline)):
	if inline[i].font.highlight_color == WD_COLOR_INDEX.YELLOW :
	colour = True
	break
	if colour:
	para_highlighted.append(para.text)
	return para_highlighted

	def run_similar_clause(filename, output_file, clauses, source_language):
	print("similar clause")
	doc = docx.Document(filename)
	for doc_input in list(input_output_similar.keys()):
	if doc.paragraphs[0].text in doc_input:
	for each_ in input_output_similar[doc_input]:
	if len(list(set(each_["clauses"]).intersection(set(clauses))))>0 :
	output_file = each_["file"]
	time.sleep(3)
	highlighted_paras = get_highlighted_clauses(output_file)
	return output_file, highlighted_paras
	output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
	return output_file, highlighted_paras

	import gradio as gr

	analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template', 'Fill Contract Template by extracting information']
	analysis_label = 'Select Contract Analysis Service'
	analysis_choices = analysis_services
	analysis_choice = ''
	lang_choice = 'english'
	translation_label = 'Upload contract for Translation'
	translation_src_label = 'Select language of uploaded contract'
	translation_tgt_label = 'Select language to translate'
	keyclause_label = 'Upload contract for Key Clause Extraction'
	redflag_label = 'Upload contract for Red Flag Identification'
	similar_label = 'Upload contract for Semantic Similar Clauses'
	similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
	generate_questions_label = 'Upload template contract for Question Generation'
	rc_file_label = 'Upload template contract with questions to fill'
	rc_context_label = 'Enter the text to extract answer from'
	delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
	button_label = "Upload and Analyze"


	translation_output_label = 'Download your translated contract'
	keyclause_output_label = 'Download your key clauses from the contract'
	redflag_output_label = 'Download your contract with red flags highlighted'
	similar_file_label = 'Download your contract with highlighted similar clauses in yellow'
	similar_text_label = 'A quick view of similar clauses'
	qg_output_label = 'Download your template contract along with questions'
	q_output_label = 'Download only questions to fill the template contract'
	rc_output_label = 'Download your template contract along with filled answers'
	rc_text_label = 'Unanswered Questions'

	def change_analysis(choice):
	global lang_choice, analysis_choices
	lang_choice = choice
	analysis_choices = [translate_paragraph(paragraph, "english", choice) for paragraph in analysis_services]
	return [gr.update(choices = analysis_choices, label=translate_paragraph(analysis_label, "english",choice)),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False)]

	def change_inputs(choice):
	global analysis_choice
	analysis_choice = choice
	if analysis_choice == analysis_choices[0]:
	return [gr.update(visible=True, label = translate_paragraph(translation_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True, label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_tgt_label, "english",lang_choice)),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
	elif analysis_choice == analysis_choices[1]:
	return [gr.update(visible=True, label = translate_paragraph(keyclause_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
	elif analysis_choice == analysis_choices[2]:
	return [gr.update(visible=True, label = translate_paragraph(redflag_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
	elif analysis_choice == analysis_choices[3]:
	return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
	elif analysis_choice == analysis_choices[4]:
	return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
	elif analysis_choice == analysis_choices[5]:
	return [gr.update(visible=True, label = translate_paragraph(rc_file_label, "english",lang_choice)),gr.update(visible=True, lines = 16, label = translate_paragraph(rc_context_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]

	def process_analysis(document_name, text, source_language, target_language, delimiter):
	if analysis_choice == analysis_choices[0]:
	translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
	return [gr.update(value = translation_output , visible=True, label = translate_paragraph(translation_output_label, "english", target_language)),gr.update(visible=False),gr.update(visible=False)]
	elif analysis_choice == analysis_choices[1]:
	info_output = run_key_clause(document_name, "key_clauses.txt",source_language)
	return [gr.update(value = info_output, visible=True, label = translate_paragraph(keyclause_output_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=False)]
	elif analysis_choice == analysis_choices[2]:
	red_flag_output = run_redflags(document_name, "redflag.docx")
	return [gr.update(value = red_flag_output,visible=True, label = translate_paragraph(redflag_output_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=False)]
	elif analysis_choice == analysis_choices[3]:
	clauses = text.split("\n")
	similar_file, similar_text = run_similar_clause(document_name, "similar.docx", clauses, source_language)
	similar_text = "\n\n\n".join(similar_text)
	return [gr.update(value = similar_file, visible=True, label = translate_paragraph(similar_file_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = similar_text, visible=True, label = translate_paragraph(similar_text_label, "english",lang_choice))]
	elif analysis_choice == analysis_choices[4]:
	qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
	return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
	elif analysis_choice == analysis_choices[5]:
	rc_file, rc_text = run_extract_info(document_name, text, "filled_contract.docx", source_language)
	rc_text = "\n\n".join(rc_text)
	return [gr.update(value = rc_file, visible=True, label = translate_paragraph(rc_output_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = rc_text, visible=True, label = translate_paragraph(rc_text_label, "english",lang_choice))]


	with gr.Blocks() as demo:
	lang_radio = gr.Radio(list(lang_dict.keys()), value = 'english', label="Select your language")
	analysis_radio = gr.Radio(analysis_services , label=analysis_label)

	with gr.Row():
	input_file = gr.File(interactive = True, visible = False)
	with gr.Column():
	translation_source = gr.Dropdown(choices = list(lang_dict.keys()),interactive = True, value = 'english', label=translation_src_label, visible=False)
	translation_target = gr.Dropdown(choices = list(lang_dict.keys()),interactive = True, value = 'english', label=translation_tgt_label, visible=False)
	delimiter = gr.Textbox(label= delimiter_label, lines=1, interactive = True, visible = False)

	input_text = gr.Textbox(lines=4, interactive = True, visible = False)

	button = gr.Button(value = button_label , visible = False)
	output_file = gr.File(interactive = False, visible = False)
	output_file2 = gr.File(interactive = False, visible = False)
	output_text = gr.Textbox(interactive = False, visible = False)

	lang_radio.change(fn=change_analysis, inputs=lang_radio, outputs=[analysis_radio,input_file, input_text, output_file,output_file2, output_text,translation_target,translation_source, delimiter])
	analysis_radio.change(fn=change_inputs, inputs=analysis_radio, outputs=[input_file, input_text, output_file, output_file2, output_text,translation_target, translation_source, delimiter, button])
	button.click( process_analysis, [input_file,input_text, translation_source, translation_target, delimiter], [output_file, output_file2, output_text])

	demo.launch(debug=True)