import os
import io
from typing import Optional

import numpy as np
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse
from PIL import Image
import requests
from paddleocr import PaddleOCR


OCR_LANG = os.getenv("OCR_LANG", "en")
PPOCR_HOME = os.getenv("PPOCR_HOME", "/tmp/.paddleocr")
os.makedirs(PPOCR_HOME, exist_ok=True)
os.environ.setdefault("PPOCR_HOME", PPOCR_HOME)

# PP-OCRv5 model configuration
USE_PP_OCRV5 = os.getenv("USE_PP_OCRV5", "true").lower() == "true"
ACTIVE_OCR_VERSION = "unknown"  # Will be set during OCR initialization


def load_ocr():
    global ACTIVE_OCR_VERSION
    try:
        if USE_PP_OCRV5:
            # Use PP-OCRv5 models as specified in the official documentation
            ocr = PaddleOCR(
                use_angle_cls=True,
                lang=OCR_LANG,
                text_detection_model_name="PP-OCRv5_server_det",
                text_recognition_model_name="PP-OCRv5_server_rec",
                use_doc_orientation_classify=False,
                use_doc_unwarping=False,
                use_textline_orientation=True,
                show_log=False
            )
            ACTIVE_OCR_VERSION = "PP-OCRv5"
        else:
            # Fallback to default models
            ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANG, show_log=False)
            ACTIVE_OCR_VERSION = "default"
    except Exception as e:
        # Final fallback for any initialization errors
        print(f"PP-OCRv5 initialization failed: {e}. Falling back to default models.")
        ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANG, show_log=False)
        ACTIVE_OCR_VERSION = "default-fallback"
    return ocr


ocr = load_ocr()

app = FastAPI(title=f"TextSense OCR (PaddleOCR {ACTIVE_OCR_VERSION})")


def read_image_from_upload(upload: UploadFile) -> Image.Image:
    bytes_data = upload.file.read()
    img = Image.open(io.BytesIO(bytes_data))  # type: ignore
    return img.convert("RGB")


def read_image_from_url(url: str) -> Image.Image:
    resp = requests.get(url, timeout=20)
    resp.raise_for_status()
    img = Image.open(io.BytesIO(resp.content))  # type: ignore
    return img.convert("RGB")


@app.post("/extract")
async def extract(
    image: Optional[UploadFile] = File(None),
    image_url: Optional[str] = Form(None),
):
    try:
        img: Optional[Image.Image] = None
        if image is not None and image.filename:
            # Starlette's UploadFile is async; ensure we read content properly
            content = await image.read()
            img = Image.open(io.BytesIO(content)).convert("RGB")  # type: ignore
        elif image_url:
            url = image_url.strip()
            if not url:
                return JSONResponse({"error": "image_url is empty"}, status_code=400)
            try:
                r = requests.get(url, timeout=20, headers={'User-Agent': 'TextSense-OCR/1.0'})
                r.raise_for_status()
                img = Image.open(io.BytesIO(r.content)).convert("RGB")  # type: ignore
            except requests.exceptions.ConnectionError as ce:
                return JSONResponse({
                    "error": f"Network connection failed: {str(ce)}. The Space may have limited network access."
                }, status_code=400)
            except requests.exceptions.Timeout:
                return JSONResponse({"error": "Request timed out while fetching image"}, status_code=400)
        else:
            return JSONResponse({"error": "No image provided. Provide 'image' file or 'image_url'."}, status_code=400)
        # Run PaddleOCR on the image
        np_img = np.array(img)
        result = ocr.ocr(np_img, cls=True)
        lines = []
        if result and isinstance(result, list):
            # result is a list with one item per image; we process the first (single image)
            for line in result[0] or []:
                try:
                    text = line[1][0]
                    score = float(line[1][1])
                    if text and score >= 0.5:
                        lines.append(text)
                except Exception:
                    continue
        extracted = "\n".join(lines).strip()
        return JSONResponse({"text": extracted})
    except requests.HTTPError as he:
        return JSONResponse({"error": f"Failed to fetch image: {str(he)}"}, status_code=400)
    except Exception as e:
        return JSONResponse({"error": f"OCR error: {str(e)}"}, status_code=500)


@app.get("/healthz")
async def healthz():
    return {"ok": True, "lang": OCR_LANG, "ocr_version": ACTIVE_OCR_VERSION}