
    hj"                     r    d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ  G d de      Zy)	    N)PathLike)Path)AnyCallableDictIteratorOptionalUnion)Document)
BaseLoaderc                       e Zd ZdZ	 	 	 	 	 ddeeef   dedee   dee   dee	e
e
ge
f      ded	efd
Zdee   fdZdededee   fdZdedefdZde
eef   dede
eef   fdZdeddfdZy)
JSONLoadera  
    Load a `JSON` file using a `jq` schema.

    Setup:
        .. code-block:: bash

            pip install -U jq

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import JSONLoader
            import json
            from pathlib import Path

            file_path='./sample_quiz.json'
            data = json.loads(Path(file_path).read_text())
            loader = JSONLoader(
                     file_path=file_path,
                     jq_schema='.quiz',
                     text_content=False)

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quizg
            .json', 'seq_num': 1}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}
    N	file_path	jq_schemacontent_keyis_content_key_jq_parsablemetadata_functext_content
json_linesc                     	 ddl }|| _         t        |      j                         | _        |j                  |      | _        || _        || _        || _	        || _
        || _        y# t        $ r t        d      w xY w)a~  Initialize the JSONLoader.

        Args:
            file_path (Union[str, PathLike]): The path to the JSON or JSON Lines file.
            jq_schema (str): The jq schema to use to extract the data or text from
                the JSON.
            content_key (str): The key to use to extract the content from
                the JSON if the jq_schema results to a list of objects (dict).
                If is_content_key_jq_parsable is True, this has to be a jq compatible
                schema. If is_content_key_jq_parsable is False, this should be a simple
                string key.
            is_content_key_jq_parsable (bool): A flag to determine if
                content_key is parsable by jq or not. If True, content_key is
                treated as a jq schema and compiled accordingly. If False or if
                content_key is None, content_key is used as a simple string.
                Default is False.
            metadata_func (Callable[Dict, Dict]): A function that takes in the JSON
                object extracted by the jq_schema and the default metadata and returns
                a dict of the updated metadata.
            text_content (bool): Boolean flag to indicate whether the content is in
                string format, default to True.
            json_lines (bool): Boolean flag to indicate whether the input is in
                JSON Lines format.
        r   Nz=jq package not found, please install it with `pip install jq`)jqImportErrorr   resolver   compile
_jq_schema_is_content_key_jq_parsable_content_key_metadata_func_text_content_json_lines)	selfr   r   r   r   r   r   r   r   s	            n/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/json_loader.py__init__zJSONLoader.__init__T   s    D	DG i002**Y/+E('+)%  	O 	s   A% %A:returnc              #   v  K   d}| j                   ra| j                  j                  d      5 }|D ]5  }|j                         }|s| j	                  ||      D ]  }| |dz  } 7 	 ddd       y| j	                  | j                  j                  d      |      D ]  }| |dz  } y# 1 sw Y   yxY ww)z-Load and return documents from the JSON file.r   z	utf-8-sig)encoding   N)r    r   openstrip_parse	read_text)r!   indexflinedocs        r"   	lazy_loadzJSONLoader.lazy_load   s     $$k$: 'a 'D::<D#';;tU#; 'C"%I!QJE''' ' {{((+(>  	
	' 's#   +B9B-"B-)AB9-B62B9contentr,   c              #   `  K   | j                   j                  t        j                  |            }| j                  | j                  |       t        ||dz         D ]M  \  }}| j                  |      }| j                  |t        | j                        |      }t        ||       O yw)z#Convert given content to documents.Nr'   )sample)r3   sourceseq_num)page_contentmetadata)r   inputjsonloadsr   _validate_content_key	enumerate	_get_text_get_metadatastrr   r   )r!   r1   r,   datair3   textr7   s           r"   r*   zJSONLoader._parse   s     $$TZZ%89
 (&&t,"43 	AIAv>>>0D))c$..&91 * H x@@	As   B,B.r3   c                    | j                   a| j                  rE| j                  j                  | j                         }|j	                  |      j                         }n|| j                      }n|}| j                  r*t        |t              s|t        dt        |       d      t        |t              r|S t        |t        t        f      r|rt        j                  |      S dS |t        |      S dS )zConvert sample to string formatz%Expected page_content is string, got z instead.                     Set `text_content=False` if the desired input for                     `page_content` is not a string )r   r   r   r   r8   firstr   
isinstancer?   
ValueErrortypedictlistr9   dumps)r!   r3   compiled_content_keyr1   s       r"   r=   zJSONLoader._get_text   s    (//'+wwt7H7H'I$.44V<BBD !2!23Gj#&>7CV7W G3 4  %N$.*14::g&9r9#*#63w<>B>    additional_fieldsc                     | j                   <| j                  ||      }t        |t              st        dt	        |       d      |S |S )z
        Return a metadata dictionary base on the existence of metadata_func
        :param sample: single data payload
        :param additional_fields: key-word arguments to be added as metadata values
        :return:
        zUExpected the metadata_func to return a dict but got                                 ``)r   rF   rI   rG   rH   )r!   r3   rN   results       r"   r>   zJSONLoader._get_metadata   s\     *((1BCFfd+ ""&v,q2  M$$rM   r@   c                    |j                         }t        |t              st        dt	        |       d      | j
                  s4|j                  | j                        t        d| j                   d      | j
                  r\| j                  j                  | j                        j                  |      j                         t        d| j                   d      yy)zCheck if a content key is validztExpected the jq schema to result in a list of objects (dict),                     so sample must be a dict but got `rP   Nz_Expected the jq schema to result in a list of objects (dict)                     with the key `z ` which should be parsable by jq)rE   rF   rI   rG   rH   r   getr   r   r   r8   rB   )r!   r@   r3   s      r"   r;   z JSONLoader._validate_content_key   s     &$'77;F|nAG  00

4,,-5##'#4#4"5Q8 
 ,, 1 1288@EEGO##'#4#4"55UW  P -rM   )NFNTF)__name__
__module____qualname____doc__r
   r?   r   r	   boolr   r   r#   r   r   r0   intr*   r   r=   r>   r;    rM   r"   r   r      s   FX &*5:@D! 1&h'1& 1& c]	1&
 %-TN1&  $t); <=1& 1& 1&f8H- $Ac A# A(82D A"? ? ?4%38n%;>%	c3h%(# $ rM   r   )r9   osr   pathlibr   typingr   r   r   r   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   r   rZ   rM   r"   <module>r`      s(       A A - @f frM   