""" OCR Text Detection App ====================== Gradio app for OCR text extraction Features: File upload, URL upload, demo images, confidence filtering """ import cv2 import easyocr import numpy as np from pathlib import Path import gradio as gr import urllib.request import tempfile import os import warnings warnings.filterwarnings('ignore') # Initialize EasyOCR reader once (reused for all images) reader = easyocr.Reader(['en'], gpu=False, verbose=False) def format_text_aligned(results): """Format OCR results by grouping text by Y-coordinate (lines) and sorting by X (left-to-right).""" if not results: return "" # Extract Y-center and X-min for each detection detections = [(sum(p[1] for p in bbox) / len(bbox), min(p[0] for p in bbox), text) for bbox, text, _ in results] if not detections: return "" # Calculate threshold to group detections on same line (30% of avg line spacing) y_coords = [d[0] for d in detections] y_threshold = (max(y_coords) - min(y_coords)) / len(set(int(y) for y in y_coords)) * 0.3 # Sort by Y (top to bottom), then X (left to right) detections.sort(key=lambda x: (x[0], x[1])) lines, current_line, current_y = [], [], detections[0][0] if detections else 0 # Group detections by similar Y coordinates into lines for y, x, text in detections: if abs(y - current_y) <= y_threshold: current_line.append((x, text)) else: if current_line: lines.append(' '.join([t[1] for t in sorted(current_line, key=lambda x: x[0])])) current_line, current_y = [(x, text)], y if current_line: lines.append(' '.join([t[1] for t in sorted(current_line, key=lambda x: x[0])])) return '\n'.join(lines) def process_ocr(input_image, confidence_threshold=0.0): """Process image with OCR and return annotated image + formatted text.""" if input_image is None: return None, "" # Convert RGB to BGR for OpenCV image_bgr = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) # Perform OCR results = reader.readtext(image_bgr) # Filter by confidence threshold filtered_results = [(bbox, text, conf) for bbox, text, conf in results if conf >= confidence_threshold] formatted_text = format_text_aligned(filtered_results) # Draw bounding boxes and labels on image annotated_image = image_bgr.copy() for bbox, text, confidence in filtered_results: # Draw bounding box polygon bbox_points = np.array([[int(p[0]), int(p[1])] for p in bbox], dtype=np.int32) cv2.polylines(annotated_image, [bbox_points], isClosed=True, color=(0, 255, 0), thickness=2) # Calculate position for text label x_min, y_min = int(min(p[0] for p in bbox)), int(min(p[1] for p in bbox)) label = f"{text} ({confidence:.2f})" (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1) # Position text above or below box based on Y position text_y = y_min - 5 if y_min > 20 else y_min + 20 # Draw background rectangle and text cv2.rectangle(annotated_image, (x_min - 2, text_y - h - 2), (x_min + w + 2, text_y + 2), (0, 255, 0), -1) cv2.putText(annotated_image, label, (x_min, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1) return cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB), formatted_text or "" # Load sample images for demo gallery exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif') sample_images = sorted([str(f) for f in Path('images').iterdir() if f.suffix.lower() in exts])[:3] # CSS for professional styling css = """ .gradio-container {font-family: 'Segoe UI', sans-serif; max-width: 1400px; margin: 0 auto; overflow-x: hidden;} body, html {overflow-x: hidden; scrollbar-width: none;} ::-webkit-scrollbar {display: none;} h1 {text-align: center; color: #042AFF; margin-bottom: 1rem; font-size: 2.5rem; font-weight: bold; letter-spacing: -0.5px;} .description {text-align: center; color: #6b7280; margin-bottom: 0.3rem; font-size: 1.05rem; line-height: 1.6;} .credits {text-align: center; color: #f2faf4; margin-bottom: 2rem; margin-top: 0; font-size: 1rem;} .credits a {color: #042AFF; text-decoration: none; font-weight: bold; transition: color 0.3s ease;} .credits a:hover {color: #111F68; text-decoration: underline;} """ # Create Gradio interface with gr.Blocks(title="OCR Text Detection", theme=gr.themes.Soft(), css=css) as demo: gr.Markdown("# 📄 OCR Text Detection") gr.Markdown("
Extract text from images with bounding boxes and confidence scores. Upload an image or select a demo image to get started.
", elem_classes=["description"]) gr.Markdown("
Made by Techtics.ai
", elem_classes=["credits"]) # Main layout: Two columns with gr.Row(): # Column 1: Upload area with tabs with gr.Column(scale=1): with gr.Tabs(): with gr.Tab("Upload File"): image_input = gr.Image(label="Upload Image", type="numpy", height=400) with gr.Tab("Image by URL"): url_input = gr.Textbox(label="Image URL", placeholder="Enter image URL (jpg, png, etc.)", lines=1) url_btn = gr.Button("Load Image from URL", variant="primary") # Demo images gallery if sample_images: gr.Markdown("### Demo Images (Click to load)") demo_gallery = gr.Gallery(value=sample_images, columns=3, rows=1, height="40px", show_label=False, container=False, allow_preview=False, object_fit="contain") # Column 2: Processed image and confidence slider with gr.Column(scale=1): gr.Markdown("### Processed Image") annotated_output = gr.Image(label="", type="numpy", height=400, visible=True) confidence_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, label="Confidence Threshold", info="Filter detections by minimum confidence score") # Text output below both columns (full width, hidden until processing) text_output = gr.Textbox(label="Extracted Text", value="", placeholder="Extracted text will appear here after processing...", lines=12, interactive=True, show_copy_button=True, visible=False) # Load image from URL def load_from_url(url): """Download and load image from URL.""" if not url or not url.strip(): return None try: req = urllib.request.Request(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}) with urllib.request.urlopen(req, timeout=10) as response: with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file: tmp_file.write(response.read()) tmp_path = tmp_file.name img = cv2.imread(tmp_path) os.unlink(tmp_path) return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if img is not None else None except Exception: return None # Load demo image from gallery def load_from_gallery(evt: gr.SelectData): """Load demo image when clicked.""" if evt.index < len(sample_images): img = cv2.imread(sample_images[evt.index]) return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if img is not None else None return None # Event handlers url_btn.click(fn=load_from_url, inputs=url_input, outputs=image_input) url_input.submit(fn=load_from_url, inputs=url_input, outputs=image_input) if sample_images: demo_gallery.select(fn=load_from_gallery, outputs=image_input) # Process image when it changes or confidence slider changes def on_change(img, conf_thresh): """Process image and update annotated image + text output.""" if img is None: return gr.update(visible=True, value=None), gr.update(visible=False, value="") annot, text = process_ocr(img, conf_thresh) return gr.update(visible=True, value=annot), gr.update(visible=True, value=text or "") image_input.change(fn=on_change, inputs=[image_input, confidence_slider], outputs=[annotated_output, text_output]) confidence_slider.change(fn=on_change, inputs=[image_input, confidence_slider], outputs=[annotated_output, text_output]) if __name__ == "__main__": demo.launch(share=True, server_name="0.0.0.0", server_port=7860) # http://localhost:7860/ to access the app