from pathlib import Path
from pdf2image import convert_from_path, pdfinfo_from_path
from google.cloud import vision
from google.oauth2 import service_account
from utils.get_layout import Prediction
import json
import sys
import time
from dotenv import load_dotenv
import os
load_dotenv()

# --- CONFIGURATION ---
# PDF_PATH = "QuantitativeAptitudeVOL1.pdf"
# OUTPUT_JSON = Path("public/output_files/ocr_output.json")
GCP_CREDENTIALS_JSON = "public/credentials/isentropic-card-468114-f6-1ee415ed3a11.json"
POPPLER_PATH =  os.getenv("POPPLER_PATH")# Adjust as needed

# Initialize Google Vision client
try:
    credentials = service_account.Credentials.from_service_account_file(
        GCP_CREDENTIALS_JSON
    )
    vision_client = vision.ImageAnnotatorClient(credentials=credentials)
except Exception as e:
    print(f"Failed to initialize Vision client: {e}")
    sys.exit(1)

# Helper: OCR a PIL image to text
def ocr_page_image(pil_image):
    from io import BytesIO

    buffer = BytesIO()
    pil_image.save(buffer, format="PNG")
    image = vision.Image(content=buffer.getvalue())
    response = vision_client.document_text_detection(image=image)
    if response.error.message:
        raise RuntimeError(f"OCR error: {response.error.message}")
    return response.full_text_annotation.text


# progress_store = {}

# def extract_text_from_pdf(pdf_path, task_id=None):
#     pdfinfo_from_path(pdf_path, userpw=None, poppler_path=POPPLER_PATH)
#     pages = convert_from_path(pdf_path, dpi=150, poppler_path=POPPLER_PATH)
#     results = []
#     detector = Prediction(
#         model_path="layout_detector_model.pt",
#         output_dir="crops_out",
#         conf_thresh=0.9,
#         allowed_labels=["Figure", "Table"]
#     )

#     total_pages = len(pages)
#     for i, page_img in enumerate(pages, start=1):
#         text = ocr_page_image(page_img)
#         page_img, crops = detector.generate(page_img, img_name=f"page_{i:03d}")
#         results.append({"page": i, "text": text, "figures": crops})

#         # Update global progress store
#         percent = round((i / total_pages) * 100, 2)
#         progress_store[task_id] = {
#             "page": i,
#             "total": total_pages,
#             "percent": percent,
#             "status": "processing"
#         }
#         time.sleep(0.5)  # simulate delay so frontend sees gradual updates

#     # Final update
#     progress_store[task_id] = {
#         "page": total_pages,
#         "total": total_pages,
#         "percent": 100,
#         "status": "done"
#     }
#     return results, progress_store

# if __name__ == "__main__":
#     data = extract_text_from_pdf(PDF_PATH)
#     with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
#         json.dump(data, f, ensure_ascii=False, indent=2)
#     print(f"Saved OCR results to {OUTPUT_JSON}")