Spaces:

mateenahmed
/

OCR

Running

App Files Files Community

github-actions[bot] commited on Nov 13

Commit

ae15dbe

1 Parent(s): 1d0d685

🚀 Automated OCR deployment from GitHub Actions

Browse files

Files changed (8) hide show

.gitattributes +3 -35
=0.1.4 +55 -0
README.md +69 -8
app.py +186 -0
images/card.jpg +3 -0
images/demo.png +3 -0
images/google.png +3 -0
requirements.txt +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+images/*.png filter=lfs diff=lfs merge=lfs -text
+images/*.jpg filter=lfs diff=lfs merge=lfs -text
+images/*.jpeg filter=lfs diff=lfs merge=lfs -text

=0.1.4 ADDED Viewed

	@@ -0,0 +1,55 @@

+Collecting huggingface-hub>=0.30
+  Downloading huggingface_hub-1.1.3-py3-none-any.whl.metadata (13 kB)
+Collecting hf-transfer
+  Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)
+Requirement already satisfied: filelock in /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/site-packages (from huggingface-hub>=0.30) (3.20.0)
+Requirement already satisfied: fsspec>=2023.5.0 in /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/site-packages (from huggingface-hub>=0.30) (2025.10.0)
+Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface-hub>=0.30)
+  Downloading hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
+Collecting httpx<1,>=0.23.0 (from huggingface-hub>=0.30)
+  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
+Requirement already satisfied: packaging>=20.9 in /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/site-packages (from huggingface-hub>=0.30) (25.0)
+Requirement already satisfied: pyyaml>=5.1 in /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/site-packages (from huggingface-hub>=0.30) (6.0.3)
+Collecting shellingham (from huggingface-hub>=0.30)
+  Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)
+Collecting tqdm>=4.42.1 (from huggingface-hub>=0.30)
+  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
+Collecting typer-slim (from huggingface-hub>=0.30)
+  Downloading typer_slim-0.20.0-py3-none-any.whl.metadata (16 kB)
+Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/hostedtoolcache/Python/3.10.19/x64/lib/python3.10/site-packages (from huggingface-hub>=0.30) (4.15.0)
+Collecting anyio (from httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading anyio-4.11.0-py3-none-any.whl.metadata (4.1 kB)
+Collecting certifi (from httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading certifi-2025.11.12-py3-none-any.whl.metadata (2.5 kB)
+Collecting httpcore==1.* (from httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading httpcore-1.0.9-py3-none-any.whl.metadata (21 kB)
+Collecting idna (from httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading idna-3.11-py3-none-any.whl.metadata (8.4 kB)
+Collecting h11>=0.16 (from httpcore==1.*->httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading h11-0.16.0-py3-none-any.whl.metadata (8.3 kB)
+Collecting exceptiongroup>=1.0.2 (from anyio->httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading exceptiongroup-1.3.0-py3-none-any.whl.metadata (6.7 kB)
+Collecting sniffio>=1.1 (from anyio->httpx<1,>=0.23.0->huggingface-hub>=0.30)
+  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
+Collecting click>=8.0.0 (from typer-slim->huggingface-hub>=0.30)
+  Downloading click-8.3.0-py3-none-any.whl.metadata (2.6 kB)
+Downloading huggingface_hub-1.1.3-py3-none-any.whl (515 kB)
+Downloading hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
+   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.3/3.3 MB 164.6 MB/s  0:00:00
+Downloading httpx-0.28.1-py3-none-any.whl (73 kB)
+Downloading httpcore-1.0.9-py3-none-any.whl (78 kB)
+Downloading hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
+   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.6/3.6 MB 615.8 MB/s  0:00:00
+Downloading h11-0.16.0-py3-none-any.whl (37 kB)
+Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
+Downloading anyio-4.11.0-py3-none-any.whl (109 kB)
+Downloading exceptiongroup-1.3.0-py3-none-any.whl (16 kB)
+Downloading idna-3.11-py3-none-any.whl (71 kB)
+Downloading sniffio-1.3.1-py3-none-any.whl (10 kB)
+Downloading certifi-2025.11.12-py3-none-any.whl (159 kB)
+Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)
+Downloading typer_slim-0.20.0-py3-none-any.whl (47 kB)
+Downloading click-8.3.0-py3-none-any.whl (107 kB)
+Installing collected packages: tqdm, sniffio, shellingham, idna, hf-xet, hf-transfer, h11, exceptiongroup, click, certifi, typer-slim, httpcore, anyio, httpx, huggingface-hub
+Successfully installed anyio-4.11.0 certifi-2025.11.12 click-8.3.0 exceptiongroup-1.3.0 h11-0.16.0 hf-transfer-0.1.9 hf-xet-1.2.0 httpcore-1.0.9 httpx-0.28.1 huggingface-hub-1.1.3 idna-3.11 shellingham-1.5.4 sniffio-1.3.1 tqdm-4.67.1 typer-slim-0.20.0

README.md CHANGED Viewed

@@ -1,12 +1,73 @@
 ---
-title: OCR
-emoji: 🦀
-colorFrom: yellow
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.49.1
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: "OCR Text Detection"
+emoji: "📄"
+colorFrom: "blue"
+colorTo: "green"
+sdk: "gradio"
+sdk_version: "4.0.0"
+app_file: "app.py"
 pinned: false
 ---
+# 📄 OCR Text Detection
+A general Optical Character Recognition (OCR) application built with Gradio and EasyOCR for extracting text from images with high accuracy.
+## Features
+- **Multiple Upload Options**
+  - Upload images directly from your device
+  - Load images from URL
+  - Try demo images with one click
+- **Smart Text Detection**
+  - Automatic text detection with bounding boxes
+  - Confidence scores for each detected text
+  - Adjustable confidence threshold filtering
+- **Visual Annotations**
+  - Green bounding boxes around detected text
+  - Confidence scores displayed on the image
+  - Easy-to-read formatted text output
+- **User-Friendly Interface**
+  - Clean and modern UI design
+  - Real-time processing
+  - Copy extracted text with one click
+  - Responsive layout
+## How to Use
+1. **Upload an Image**: Choose from three options:
+   - Click "Upload File" tab to upload from your device
+   - Click "Image by URL" tab to load from a web URL
+   - Click on any demo image to try it instantly
+2. **Adjust Confidence**: Use the confidence threshold slider (0.0 - 1.0) to filter detections based on accuracy
+3. **View Results**:
+   - See the processed image with bounding boxes
+   - Read the extracted text in the text box below
+   - Copy the text using the copy button
+## Technology Stack
+- **EasyOCR**: State-of-the-art OCR engine
+- **Gradio**: Interactive web interface
+- **OpenCV**: Image processing
+- **NumPy**: Numerical operations
+## Supported Image Formats
+- JPG/JPEG
+- PNG
+- BMP
+- TIFF/TIF
+## 🌐 Made by
+[Techtics.ai](https://techtics.ai) - AI Solutions for Business
+---
+*Extract text from invoices, receipts, documents, screenshots, and more!*

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+OCR Text Detection App
+======================
+Gradio app for OCR text extraction
+Features: File upload, URL upload, demo images, confidence filtering
+"""
+import cv2
+import easyocr
+import numpy as np
+from pathlib import Path
+import gradio as gr
+import urllib.request
+import tempfile
+import os
+import warnings
+warnings.filterwarnings('ignore')
+# Initialize EasyOCR reader once (reused for all images)
+reader = easyocr.Reader(['en'], gpu=False, verbose=False)
+def format_text_aligned(results):
+    """Format OCR results by grouping text by Y-coordinate (lines) and sorting by X (left-to-right)."""
+    if not results:
+        return ""
+    # Extract Y-center and X-min for each detection
+    detections = [(sum(p[1] for p in bbox) / len(bbox), min(p[0] for p in bbox), text) for bbox, text, _ in results]
+    if not detections:
+        return ""
+    # Calculate threshold to group detections on same line (30% of avg line spacing)
+    y_coords = [d[0] for d in detections]
+    y_threshold = (max(y_coords) - min(y_coords)) / len(set(int(y) for y in y_coords)) * 0.3
+    # Sort by Y (top to bottom), then X (left to right)
+    detections.sort(key=lambda x: (x[0], x[1]))
+    lines, current_line, current_y = [], [], detections[0][0] if detections else 0
+    # Group detections by similar Y coordinates into lines
+    for y, x, text in detections:
+        if abs(y - current_y) <= y_threshold:
+            current_line.append((x, text))
+        else:
+            if current_line:
+                lines.append(' '.join([t[1] for t in sorted(current_line, key=lambda x: x[0])]))
+            current_line, current_y = [(x, text)], y
+    if current_line:
+        lines.append(' '.join([t[1] for t in sorted(current_line, key=lambda x: x[0])]))
+    return '\n'.join(lines)
+def process_ocr(input_image, confidence_threshold=0.0):
+    """Process image with OCR and return annotated image + formatted text."""
+    if input_image is None:
+        return None, ""
+    # Convert RGB to BGR for OpenCV
+    image_bgr = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
+    # Perform OCR
+    results = reader.readtext(image_bgr)
+    # Filter by confidence threshold
+    filtered_results = [(bbox, text, conf) for bbox, text, conf in results if conf >= confidence_threshold]
+    formatted_text = format_text_aligned(filtered_results)
+    # Draw bounding boxes and labels on image
+    annotated_image = image_bgr.copy()
+    for bbox, text, confidence in filtered_results:
+        # Draw bounding box polygon
+        bbox_points = np.array([[int(p[0]), int(p[1])] for p in bbox], dtype=np.int32)
+        cv2.polylines(annotated_image, [bbox_points], isClosed=True, color=(0, 255, 0), thickness=2)
+        # Calculate position for text label
+        x_min, y_min = int(min(p[0] for p in bbox)), int(min(p[1] for p in bbox))
+        label = f"{text} ({confidence:.2f})"
+        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
+        # Position text above or below box based on Y position
+        text_y = y_min - 5 if y_min > 20 else y_min + 20
+        # Draw background rectangle and text
+        cv2.rectangle(annotated_image, (x_min - 2, text_y - h - 2), (x_min + w + 2, text_y + 2), (0, 255, 0), -1)
+        cv2.putText(annotated_image, label, (x_min, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)
+    return cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB), formatted_text or ""
+# Load sample images for demo gallery
+exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif')
+sample_images = sorted([str(f) for f in Path('images').iterdir() if f.suffix.lower() in exts])[:3]
+# CSS for professional styling
+css = """
+.gradio-container {font-family: 'Segoe UI', sans-serif; max-width: 1400px; margin: 0 auto; overflow-x: hidden;}
+body, html {overflow-x: hidden; scrollbar-width: none;}
+::-webkit-scrollbar {display: none;}
+h1 {text-align: center; color: #042AFF; margin-bottom: 1rem; font-size: 2.5rem; font-weight: bold; letter-spacing: -0.5px;}
+.description {text-align: center; color: #6b7280; margin-bottom: 0.3rem; font-size: 1.05rem; line-height: 1.6;}
+.credits {text-align: center; color: #f2faf4; margin-bottom: 2rem; margin-top: 0; font-size: 1rem;}
+.credits a {color: #042AFF; text-decoration: none; font-weight: bold; transition: color 0.3s ease;}
+.credits a:hover {color: #111F68; text-decoration: underline;}
+"""
+# Create Gradio interface
+with gr.Blocks(title="OCR Text Detection", theme=gr.themes.Soft(), css=css) as demo:
+    gr.Markdown("# 📄 OCR Text Detection")
+    gr.Markdown("<div class='description'>Extract text from images with bounding boxes and confidence scores. Upload an image or select a demo image to get started.</div>", elem_classes=["description"])
+    gr.Markdown("<div class='credits' style='text-align: center;'>Made by <a href='https://techtics.ai' target='_blank' style='color: #042AFF; text-decoration: none; font-weight: bold;'>Techtics.ai</a></div>", elem_classes=["credits"])
+    # Main layout: Two columns
+    with gr.Row():
+        # Column 1: Upload area with tabs
+        with gr.Column(scale=1):
+            with gr.Tabs():
+                with gr.Tab("Upload File"):
+                    image_input = gr.Image(label="Upload Image", type="numpy", height=400)
+                with gr.Tab("Image by URL"):
+                    url_input = gr.Textbox(label="Image URL", placeholder="Enter image URL (jpg, png, etc.)", lines=1)
+                    url_btn = gr.Button("Load Image from URL", variant="primary")
+            # Demo images gallery
+            if sample_images:
+                gr.Markdown("### Demo Images (Click to load)")
+                demo_gallery = gr.Gallery(value=sample_images, columns=3, rows=1, height="40px", show_label=False, container=False, allow_preview=False, object_fit="contain")
+        # Column 2: Processed image and confidence slider
+        with gr.Column(scale=1):
+            gr.Markdown("### Processed Image")
+            annotated_output = gr.Image(label="", type="numpy", height=400, visible=True)
+            confidence_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, label="Confidence Threshold", info="Filter detections by minimum confidence score")
+    # Text output below both columns (full width, hidden until processing)
+    text_output = gr.Textbox(label="Extracted Text", value="", placeholder="Extracted text will appear here after processing...", lines=12, interactive=True, show_copy_button=True, visible=False)
+    # Load image from URL
+    def load_from_url(url):
+        """Download and load image from URL."""
+        if not url or not url.strip():
+            return None
+        try:
+            req = urllib.request.Request(url.strip(), headers={'User-Agent': 'Mozilla/5.0'})
+            with urllib.request.urlopen(req, timeout=10) as response:
+                with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file:
+                    tmp_file.write(response.read())
+                    tmp_path = tmp_file.name
+            img = cv2.imread(tmp_path)
+            os.unlink(tmp_path)
+            return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if img is not None else None
+        except Exception:
+            return None
+    # Load demo image from gallery
+    def load_from_gallery(evt: gr.SelectData):
+        """Load demo image when clicked."""
+        if evt.index < len(sample_images):
+            img = cv2.imread(sample_images[evt.index])
+            return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if img is not None else None
+        return None
+    # Event handlers
+    url_btn.click(fn=load_from_url, inputs=url_input, outputs=image_input)
+    url_input.submit(fn=load_from_url, inputs=url_input, outputs=image_input)
+    if sample_images:
+        demo_gallery.select(fn=load_from_gallery, outputs=image_input)
+    # Process image when it changes or confidence slider changes
+    def on_change(img, conf_thresh):
+        """Process image and update annotated image + text output."""
+        if img is None:
+            return gr.update(visible=True, value=None), gr.update(visible=False, value="")
+        annot, text = process_ocr(img, conf_thresh)
+        return gr.update(visible=True, value=annot), gr.update(visible=True, value=text or "")
+    image_input.change(fn=on_change, inputs=[image_input, confidence_slider], outputs=[annotated_output, text_output])
+    confidence_slider.change(fn=on_change, inputs=[image_input, confidence_slider], outputs=[annotated_output, text_output])
+if __name__ == "__main__":
+    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)
+    # http://localhost:7860/ to access the app

images/card.jpg ADDED Viewed

Git LFS Details

SHA256: eb54b2696c670673e3eb3b76e5d5e54a51697d5d048a3b0f931d15f5ad2977c8
Pointer size: 130 Bytes
Size of remote file: 94.3 kB

images/demo.png ADDED Viewed

Git LFS Details

SHA256: 7cd6fa9d5cee76e602f278e68e7312c56a05913694ff6432a7fdeb44c994c4e6
Pointer size: 132 Bytes
Size of remote file: 1.06 MB

images/google.png ADDED Viewed

Git LFS Details

SHA256: 30c405ba19d7879ccda7b27be7dae2b842f057fabeafedd24c4c5e2514fa28fc
Pointer size: 131 Bytes
Size of remote file: 763 kB

requirements.txt ADDED Viewed

Binary file (64 Bytes). View file