Spaces:

mlopez6132
/

textsense-ocr

Running

textsense-ocr / app.py

Marc Allen Lopez

Implement PP-OCRv5 using official model names

edb3860 3 months ago

4.58 kB

	import os
	import io
	from typing import Optional

	import numpy as np
	from fastapi import FastAPI, UploadFile, File, Form
	from fastapi.responses import JSONResponse
	from PIL import Image
	import requests
	from paddleocr import PaddleOCR


	OCR_LANG = os.getenv("OCR_LANG", "en")
	PPOCR_HOME = os.getenv("PPOCR_HOME", "/tmp/.paddleocr")
	os.makedirs(PPOCR_HOME, exist_ok=True)
	os.environ.setdefault("PPOCR_HOME", PPOCR_HOME)

	# PP-OCRv5 model configuration
	USE_PP_OCRV5 = os.getenv("USE_PP_OCRV5", "true").lower() == "true"
	ACTIVE_OCR_VERSION = "unknown" # Will be set during OCR initialization


	def load_ocr():
	global ACTIVE_OCR_VERSION
	try:
	if USE_PP_OCRV5:
	# Use PP-OCRv5 models as specified in the official documentation
	ocr = PaddleOCR(
	use_angle_cls=True,
	lang=OCR_LANG,
	text_detection_model_name="PP-OCRv5_server_det",
	text_recognition_model_name="PP-OCRv5_server_rec",
	use_doc_orientation_classify=False,
	use_doc_unwarping=False,
	use_textline_orientation=True,
	show_log=False
	)
	ACTIVE_OCR_VERSION = "PP-OCRv5"
	else:
	# Fallback to default models
	ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANG, show_log=False)
	ACTIVE_OCR_VERSION = "default"
	except Exception as e:
	# Final fallback for any initialization errors
	print(f"PP-OCRv5 initialization failed: {e}. Falling back to default models.")
	ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANG, show_log=False)
	ACTIVE_OCR_VERSION = "default-fallback"
	return ocr


	ocr = load_ocr()

	app = FastAPI(title=f"TextSense OCR (PaddleOCR {ACTIVE_OCR_VERSION})")


	def read_image_from_upload(upload: UploadFile) -> Image.Image:
	bytes_data = upload.file.read()
	img = Image.open(io.BytesIO(bytes_data)) # type: ignore
	return img.convert("RGB")


	def read_image_from_url(url: str) -> Image.Image:
	resp = requests.get(url, timeout=20)
	resp.raise_for_status()
	img = Image.open(io.BytesIO(resp.content)) # type: ignore
	return img.convert("RGB")


	@app.post("/extract")
	async def extract(
	image: Optional[UploadFile] = File(None),
	image_url: Optional[str] = Form(None),
	):
	try:
	img: Optional[Image.Image] = None
	if image is not None and image.filename:
	# Starlette's UploadFile is async; ensure we read content properly
	content = await image.read()
	img = Image.open(io.BytesIO(content)).convert("RGB") # type: ignore
	elif image_url:
	url = image_url.strip()
	if not url:
	return JSONResponse({"error": "image_url is empty"}, status_code=400)
	try:
	r = requests.get(url, timeout=20, headers={'User-Agent': 'TextSense-OCR/1.0'})
	r.raise_for_status()
	img = Image.open(io.BytesIO(r.content)).convert("RGB") # type: ignore
	except requests.exceptions.ConnectionError as ce:
	return JSONResponse({
	"error": f"Network connection failed: {str(ce)}. The Space may have limited network access."
	}, status_code=400)
	except requests.exceptions.Timeout:
	return JSONResponse({"error": "Request timed out while fetching image"}, status_code=400)
	else:
	return JSONResponse({"error": "No image provided. Provide 'image' file or 'image_url'."}, status_code=400)
	# Run PaddleOCR on the image
	np_img = np.array(img)
	result = ocr.ocr(np_img, cls=True)
	lines = []
	if result and isinstance(result, list):
	# result is a list with one item per image; we process the first (single image)
	for line in result[0] or []:
	try:
	text = line[1][0]
	score = float(line[1][1])
	if text and score >= 0.5:
	lines.append(text)
	except Exception:
	continue
	extracted = "\n".join(lines).strip()
	return JSONResponse({"text": extracted})
	except requests.HTTPError as he:
	return JSONResponse({"error": f"Failed to fetch image: {str(he)}"}, status_code=400)
	except Exception as e:
	return JSONResponse({"error": f"OCR error: {str(e)}"}, status_code=500)


	@app.get("/healthz")
	async def healthz():
	return {"ok": True, "lang": OCR_LANG, "ocr_version": ACTIVE_OCR_VERSION}