
    hn                     $   U d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z	d dl	Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ erd d	lmZ  ej.                  e      Z G d
 de      Z G d de      Z G d de      ZdZeed<    G d de      Z y)    N)abstractmethod)TYPE_CHECKINGIterableIterator)Document)BaseChatModel)HumanMessage)BaseBlobParser)BlobImagec                   @    e Zd ZdZedddefd       Zdedee	   fdZ
y)	BaseImageBlobParserz6Abstract base class for parsing image blobs into text.imgr   returnc                      y)zAbstract method to analyze an image and extract textual content.

        Args:
            img: The image to be analyzed.

        Returns:
          The extracted text content.
        N )selfr   s     q/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/parsers/images.py_analyze_imagez"BaseImageBlobParser._analyze_image   s        blobc              #   v  K   	 ddl m} |j                         5 }|j                  dk(  rqt        j                  |      }|j                  dk(  r;|j                  d   dk(  r)|j                  t        j                  |d      d	
      }n#|j                  |      }n|j                  |      }| j                  |      }t        j                  d|j                  dd             t!        |i |j"                  d|j$                  i       ddd       y# t        $ r t        d      w xY w# 1 sw Y   yxY ww)zLazily parse a blob and yields Documents containing the parsed content.

        Args:
            blob (Blob): The blob to be parsed.

        Yields:
            Document:
              A document containing the parsed content and metadata.
        r   r   zG`Pillow` package not found, please install it with `pip install Pillow`zapplication/x-npy         )axisL)modezImage text: %s
z\nsource)page_contentmetadataN)PILr   ImportErroras_bytes_iomimetypenumpyloadndimshape	fromarraysqueezeopenr   loggerdebugreplacer   r#   r!   )r   r   Imgbufarrayr   contents          r   
lazy_parsezBaseImageBlobParser.lazy_parse$   s    	(  	3}} 33

3::?u{{1~':--e!(D3-OC--.Chhsm))#.GLL)7??4+GH$EDMMEh-DE 	 	  	' 		 	s3   D9D D9C2D-	D9D**D9-D62D9N)__name__
__module____qualname____doc__r   strr   r   r   r   r6   r   r   r   r   r      s=    @' c   t  (:  r   r   c                   8     e Zd ZdZ	 	 d fdZdddefdZ xZS )RapidOCRBlobParserzParser for extracting text from images using the RapidOCR library.

    Attributes:
        ocr:
          The RapidOCR instance for performing OCR.
    r   c                 0    t         |           d| _        y)z5
        Initializes the RapidOCRBlobParser.
        N)super__init__ocr)r   	__class__s    r   r@   zRapidOCRBlobParser.__init__O   s     	r   r   r   c                 4   | j                   s	 ddlm}  |       | _         | j                  t	        j
                  |            \  }}d}|r0dj                  |D cg c]  }|d   	 c}      j                         }|S # t        $ r t        d      w xY wc c}w )z
        Analyzes an image and extracts text using RapidOCR.

        Args:
            img (Image):
              The image to be analyzed.

        Returns:
            str:
              The extracted text content.
        r   )RapidOCRzc`rapidocr-onnxruntime` package not found, please install it with `pip install rapidocr-onnxruntime` r    r   )rA   rapidocr_onnxruntimerD   r%   npr4   joinstrip)r   r   rD   
ocr_result_r5   texts          r   r   z!RapidOCRBlobParser._analyze_imageX   s     xx9#: #/
Ayyj!Ad$q'!ABIIKG  !9  "Bs   A= B=B)r   N)r7   r8   r9   r:   r@   r;   r   __classcell__rB   s   @r   r=   r=   G   s(    	' c r   r=   c                   D     e Zd ZdZdddee   f fdZdddefd	Z xZS )
TesseractBlobParserzFParse for extracting text from images using the Tesseract OCR library.)eng)langsrR   c                B    t         |           t        |      | _        y)zInitialize the TesseractBlobParser.

        Args:
            langs (list[str]):
              The languages to use for OCR.
        N)r?   r@   listrR   )r   rR   rB   s     r   r@   zTesseractBlobParser.__init__x   s     	%[
r   r   r   r   c                     	 ddl }|j                  |dj                  | j                              j                         S # t        $ r t        d      w xY w)zAnalyze an image and extracts text using Tesseract OCR.

        Args:
            img: The image to be analyzed.

        Returns:
            str: The extracted text content.
        r   NzQ`pytesseract` package not found, please install it with `pip install pytesseract`+)lang)pytesseractr%   image_to_stringrH   rR   rI   )r   r   rX   s      r   r   z"TesseractBlobParser._analyze_image   s[    	 **3SXXdjj5I*JPPRR  	, 	s   A   A)	r7   r8   r9   r:   r   r;   r@   r   rM   rN   s   @r   rP   rP   u   s4    P
  (! }!S' Sc Sr   rP   a  You are an assistant tasked with summarizing images for retrieval. 1. These summaries will be embedded and used to retrieve the raw image. Give a concise summary of the image that is well optimized for retrieval
2. extract all the text from the image. Do not exclude any content from the page.
Format answer in markdown without explanatory text and without markdown delimiter ``` at the beginning. _PROMPT_IMAGES_TO_DESCRIPTIONc                   B     e Zd ZdZeddedef fdZdddefd	Z xZ	S )
LLMImageBlobParserzParser for analyzing images using a language model (LLM).

    Attributes:
        model (BaseChatModel):
          The language model to use for analysis.
        prompt (str):
          The prompt to provide to the language model.
    )promptmodelr]   c                >    t         |           || _        || _        y)zInitializes the LLMImageBlobParser.

        Args:
            model (BaseChatModel):
              The language model to use for analysis.
            prompt (str):
              The prompt to provide to the language model.
        N)r?   r@   r^   r]   )r   r^   r]   rB   s      r   r@   zLLMImageBlobParser.__init__   s     	
r   r   r   r   c           	         t        j                         }|j                  |d       t        j                  |j                               j                  d      }| j                  j                  t        d| j                  j                  t              dddd| id	g
      g      }|j                  }t        |t              sJ |S )zAnalyze an image using the provided language model.

        Args:
            img: The image to be analyzed.

        Returns:
            The extracted textual content.
        PNG)formatzutf-8rL   )typerL   	image_urlurlzdata:image/jpeg;base64,)rc   rd   )r5   )ioBytesIOsavebase64	b64encodegetvaluedecoder^   invoker	   r]   rb   r5   
isinstancer;   )r   r   image_bytes
img_base64msgresults         r   r   z!LLMImageBlobParser._analyze_image   s     jjlU+%%k&:&:&<=DDWM
jj %+$(KK$6$6f$6$E
 %0 %)@'M*
$ &#&&&r   )
r7   r8   r9   r:   rZ   r   r;   r@   r   rM   rN   s   @r   r\   r\      s9     4	  	$ '  c  r   r\   )!ri   rf   loggingabcr   typingr   r   r   r(   rG   langchain_core.documentsr   langchain_core.language_modelsr   langchain_core.messagesr	   )langchain_community.document_loaders.baser
   1langchain_community.document_loaders.blob_loadersr   	PIL.Imager   	getLoggerr7   r/   r   r=   rP   rZ   r;   __annotations__r\   r   r   r   <module>r~      s     	   4 4   - 8 0 D B			8	$.. .b+, +\!S- !SJ< s <, <r   