Spaces:
Sleeping
Sleeping
| """ | |
| OCR Text Detection App | |
| ====================== | |
| Gradio app for OCR text extraction | |
| Features: File upload, URL upload, demo images, confidence filtering | |
| """ | |
| import cv2 | |
| import easyocr | |
| import numpy as np | |
| from pathlib import Path | |
| import gradio as gr | |
| import urllib.request | |
| import tempfile | |
| import os | |
| import warnings | |
| from PIL import Image | |
| warnings.filterwarnings('ignore') | |
| # Initialize EasyOCR reader once (reused for all images) | |
| reader = easyocr.Reader(['en'], gpu=False, verbose=False) | |
| def format_text_aligned(results): | |
| """Format OCR results by grouping text by Y-coordinate (lines) and sorting by X (left-to-right).""" | |
| if not results: | |
| return "" | |
| # Extract Y-center and X-min for each detection | |
| detections = [(sum(p[1] for p in bbox) / len(bbox), min(p[0] for p in bbox), text) for bbox, text, _ in results] | |
| if not detections: | |
| return "" | |
| # Calculate threshold to group detections on same line (30% of avg line spacing) | |
| y_coords = [d[0] for d in detections] | |
| y_threshold = (max(y_coords) - min(y_coords)) / len(set(int(y) for y in y_coords)) * 0.3 | |
| # Sort by Y (top to bottom), then X (left to right) | |
| detections.sort(key=lambda x: (x[0], x[1])) | |
| lines, current_line, current_y = [], [], detections[0][0] if detections else 0 | |
| # Group detections by similar Y coordinates into lines | |
| for y, x, text in detections: | |
| if abs(y - current_y) <= y_threshold: | |
| current_line.append((x, text)) | |
| else: | |
| if current_line: | |
| lines.append(' '.join([t[1] for t in sorted(current_line, key=lambda x: x[0])])) | |
| current_line, current_y = [(x, text)], y | |
| if current_line: | |
| lines.append(' '.join([t[1] for t in sorted(current_line, key=lambda x: x[0])])) | |
| return '\n'.join(lines) | |
| def process_ocr(input_image, confidence_threshold=0.0): | |
| """Process image with OCR and return annotated image + formatted text.""" | |
| if input_image is None: | |
| return None, "" | |
| # Convert PIL Image to numpy array, then to BGR for OpenCV | |
| if isinstance(input_image, Image.Image): | |
| input_image = np.array(input_image) | |
| image_bgr = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) | |
| # Perform OCR | |
| results = reader.readtext(image_bgr) | |
| # Filter by confidence threshold | |
| filtered_results = [(bbox, text, conf) for bbox, text, conf in results if conf >= confidence_threshold] | |
| formatted_text = format_text_aligned(filtered_results) | |
| # Draw bounding boxes and labels on image | |
| annotated_image = image_bgr.copy() | |
| for bbox, text, confidence in filtered_results: | |
| # Draw bounding box polygon | |
| bbox_points = np.array([[int(p[0]), int(p[1])] for p in bbox], dtype=np.int32) | |
| cv2.polylines(annotated_image, [bbox_points], isClosed=True, color=(0, 255, 0), thickness=2) | |
| # Calculate position for text label (text label size scales with bounding box size) | |
| x_min, y_min = int(min(p[0] for p in bbox)), int(min(p[1] for p in bbox)) | |
| x_max, y_max = int(max(p[0] for p in bbox)), int(max(p[1] for p in bbox)) | |
| bbox_width = x_max - x_min | |
| bbox_height = y_max - y_min | |
| # Scale font size based on bounding box dimensions | |
| font_scale = min(bbox_width, bbox_height) / 100.0 | |
| font_scale = max(0.3, min(font_scale, 1.5)) # Clamp between 0.3 and 1.5 | |
| thickness = max(1, int(font_scale * 2)) | |
| label = f"{text} ({confidence:.2f})" | |
| (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness) | |
| # Position text above or below box based on Y position | |
| text_y = y_min - 5 if y_min > 20 else y_min + 20 | |
| # Draw background rectangle and text | |
| cv2.rectangle(annotated_image, (x_min - 2, text_y - h - 2), (x_min + w + 2, text_y + 2), (0, 255, 0), -1) | |
| cv2.putText(annotated_image, label, (x_min, text_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness) | |
| # Convert back to RGB and then to PIL Image for Gradio 5.x compatibility | |
| output_rgb = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB) | |
| output_pil = Image.fromarray(output_rgb) | |
| return output_pil, formatted_text or "" | |
| # Load sample images for demo gallery | |
| exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif') | |
| sample_images = sorted([str(f) for f in Path('images').iterdir() if f.suffix.lower() in exts])[:3] | |
| # CSS for professional styling | |
| css = """ | |
| .gradio-container {font-family: 'Segoe UI', sans-serif; max-width: 1400px; margin: 0 auto; overflow-x: hidden;} | |
| body, html {overflow-x: hidden; scrollbar-width: none;} | |
| ::-webkit-scrollbar {display: none;} | |
| h1 {text-align: center; color: #042AFF; margin-bottom: 1rem; font-size: 2.5rem; font-weight: bold; letter-spacing: -0.5px;} | |
| .description {text-align: center; color: #6b7280; margin-bottom: 0.3rem; font-size: 1.05rem; line-height: 1.6;} | |
| .credits {text-align: center; color: #f2faf4; margin-bottom: 2rem; margin-top: 0; font-size: 1rem;} | |
| .credits a {color: #042AFF; text-decoration: none; font-weight: bold; transition: color 0.3s ease;} | |
| .credits a:hover {color: #111F68; text-decoration: underline;} | |
| """ | |
| # Create Gradio interface | |
| with gr.Blocks(title="OCR Text Detection", theme=gr.themes.Soft(), css=css) as demo: | |
| gr.Markdown("# π OCR Text Detection") | |
| gr.Markdown("<div class='description'>Extract text from images with bounding boxes and confidence scores. Upload an image or select a demo image to get started.</div>", elem_classes=["description"]) | |
| gr.Markdown("<div class='credits' style='text-align: center;'>Made by <a href='https://techtics.ai' target='_blank' style='color: #042AFF; text-decoration: none; font-weight: bold;'>Techtics.ai</a></div>", elem_classes=["credits"]) | |
| # Main layout: Two columns | |
| with gr.Row(): | |
| # Column 1: Upload area with tabs | |
| with gr.Column(scale=1): | |
| with gr.Tabs(): | |
| with gr.Tab("Upload File"): | |
| image_input = gr.Image(label="Upload Image", height=400, show_download_button=True, show_share_button=False, show_fullscreen_button=True) | |
| with gr.Tab("Image by URL"): | |
| url_input = gr.Textbox(label="Image URL", placeholder="Enter image URL (jpg, png, etc.)", lines=1) | |
| url_btn = gr.Button("Load Image from URL", variant="primary") | |
| # Demo images gallery | |
| if sample_images: | |
| gr.Markdown("### Demo Images (Click to load)") | |
| demo_gallery = gr.Gallery(value=sample_images, columns=3, rows=1, height=210, show_label=False, container=True, preview=False, object_fit="scale-down") | |
| # Column 2: Processed image and confidence slider | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Processed Image") | |
| annotated_output = gr.Image(label="", height=400, visible=True, show_download_button=True, show_share_button=False, show_fullscreen_button=True) | |
| confidence_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, label="Confidence Threshold", info="Filter detections by minimum confidence score") | |
| # Text output below both columns (full width, hidden until processing) | |
| text_output = gr.Textbox(label="Extracted Text", value="", placeholder="Extracted text will appear here after processing...", lines=12, interactive=True, show_copy_button=True, visible=False) | |
| # Load image from URL | |
| def load_from_url(url): | |
| """Download and load image from URL.""" | |
| if not url or not url.strip(): | |
| return None | |
| try: | |
| req = urllib.request.Request(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}) | |
| with urllib.request.urlopen(req, timeout=10) as response: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file: | |
| tmp_file.write(response.read()) | |
| tmp_path = tmp_file.name | |
| img = Image.open(tmp_path) | |
| os.unlink(tmp_path) | |
| return img | |
| except Exception: | |
| return None | |
| # Load demo image from gallery | |
| def load_from_gallery(evt: gr.SelectData): | |
| """Load demo image when clicked.""" | |
| if evt.index < len(sample_images): | |
| return Image.open(sample_images[evt.index]) | |
| return None | |
| # Event handlers | |
| url_btn.click(fn=load_from_url, inputs=url_input, outputs=image_input) | |
| url_input.submit(fn=load_from_url, inputs=url_input, outputs=image_input) | |
| if sample_images: | |
| demo_gallery.select(fn=load_from_gallery, outputs=image_input) | |
| # Process image when it changes or confidence slider changes | |
| def on_change(img, conf_thresh): | |
| """Process image and update annotated image + text output.""" | |
| if img is None: | |
| return None, gr.update(visible=False, value="") | |
| annot, text = process_ocr(img, conf_thresh) | |
| return annot, gr.update(visible=True, value=text or "") | |
| # Process image when it changes - store event to allow cancellation | |
| process_event = image_input.change( | |
| fn=on_change, | |
| inputs=[image_input, confidence_slider], | |
| outputs=[annotated_output, text_output] | |
| ) | |
| # Confidence slider cancels previous processing to avoid queue buildup | |
| confidence_slider.change( | |
| fn=on_change, | |
| inputs=[image_input, confidence_slider], | |
| outputs=[annotated_output, text_output], | |
| cancels=[process_event] # Cancel previous image processing | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |
| # For local testing, use: demo.launch(share=True, server_name="0.0.0.0", server_port=7860) |