Image_to_text / app.py
pratyyush's picture
Update app.py
24f3c38 verified
import gradio as gr
from doctr.models import ocr_predictor
import numpy as np
model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
def extract_word_ready_table(image):
if image is None:
return "Please upload an image."
img_array = np.array(image)
result = model([img_array])
json_export = result.export()
markdown_rows = []
# Boundaries for Name | Code | Statement | Group | Sub-Group | Normally
col_bounds = [0.28, 0.35, 0.48, 0.62, 0.88]
for page in json_export['pages']:
words_list = []
for block in page['blocks']:
for line in block['lines']:
for word in line['words']:
y_top = word['geometry'][0][1]
y_bot = word['geometry'][1][1]
x_mid = (word['geometry'][0][0] + word['geometry'][1][0]) / 2
words_list.append({
'text': word['value'],
'y_top': y_top,
'y_bot': y_bot,
'y_mid': (y_top + y_bot) / 2,
'x_mid': x_mid
})
if not words_list: continue
words_list.sort(key=lambda w: w['y_mid'])
# 1. Smarter Row Grouping: We use a larger threshold (0.02)
# to catch text that is slightly above or below the main line
rows = []
current_row = [words_list[0]]
for i in range(1, len(words_list)):
# If word overlaps vertically with the current row, it's the SAME row
if words_list[i]['y_top'] < current_row[-1]['y_bot'] + 0.01:
current_row.append(words_list[i])
else:
rows.append(current_row)
current_row = [words_list[i]]
rows.append(current_row)
# 2. Build the line
for row in rows:
slots = ["", "", "", "", "", ""]
for w in row:
x = w['x_mid']
t = w['text']
if x < col_bounds[0]: slots[0] += t + " "
elif x < col_bounds[1]: slots[1] += t + " "
elif x < col_bounds[2]: slots[2] += t + " "
elif x < col_bounds[3]: slots[3] += t + " "
elif x < col_bounds[4]: slots[4] += t + " "
else: slots[5] += t + " "
clean_slots = [s.strip() for s in slots]
if any(clean_slots):
# We use the Pipe (|) as the only separator
markdown_rows.append("| " + " | ".join(clean_slots) + " |")
return "\n".join(markdown_rows)
with gr.Blocks() as demo:
gr.Markdown("## πŸ“‘ Word-Ready Accountancy Extractor")
gr.Markdown("Forces wrapped text into a single line to prevent Word from merging cells incorrectly.")
with gr.Row():
with gr.Column():
img_in = gr.Image(type="pil")
btn = gr.Button("Extract for Word", variant="primary")
with gr.Column():
out = gr.Textbox(label="Result (One Line Per Row)", lines=25, elem_id="out_box")
copy_btn = gr.Button("πŸ“‹ Copy Table")
copy_btn.click(None, None, None, js="""
() => {
const text = document.querySelector('#output-text textarea').value;
navigator.clipboard.writeText(text);
alert('Copied! Now use Insert > Table > Convert Text to Table in Word.');
}
""")
btn.click(extract_word_ready_table, inputs=img_in, outputs=out)
demo.launch()