
    hS                         d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ  G d	 d
e      Z G d de      Zy)    N)TextIOWrapper)Path)AnyDictIteratorListOptionalSequenceUnion)Document)
BaseLoader)detect_file_encodings)UnstructuredFileLoadervalidate_unstructured_versionc                       e Zd ZdZ	 	 	 	 	 ddddeeef   dee   dee   dee	   d	ee   d
e
dee   fdZdee   fdZdedee   fdZy)	CSVLoadera  Load a `CSV` file into a list of Documents.

    Each document represents one row of the CSV file. Every row is converted
    into a key/value pair and outputted to a new line in the document's
    page_content.

    The source for each document loaded from csv is set to the value of the
    `file_path` argument for all documents by default.
    You can override this by setting the `source_column` argument to the
    name of a column in the CSV file.
    The source of each document will then be set to the value of the column
    with the name specified in `source_column`.

    Output Example:
        .. code-block:: txt

            column1: value1
            column2: value2
            column3: value3

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import CSVLoader

            loader = CSVLoader(file_path='./hw_200.csv',
                csv_args={
                'delimiter': ',',
                'quotechar': '"',
                'fieldnames': ['Index', 'Height', 'Weight']
            })

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}
    N )content_columns	file_pathsource_columnmetadata_columnscsv_argsencodingautodetect_encodingr   c                n    || _         || _        || _        || _        |xs i | _        || _        || _        y)a  

        Args:
            file_path: The path to the CSV file.
            source_column: The name of the column in the CSV file to use as the source.
              Optional. Defaults to None.
            metadata_columns: A sequence of column names to use as metadata. Optional.
            csv_args: A dictionary of arguments to pass to the csv.DictReader.
              Optional. Defaults to None.
            encoding: The encoding of the CSV file. Optional. Defaults to None.
            autodetect_encoding: Whether to try to autodetect the file encoding.
            content_columns: A sequence of column names to use for the document content.
                If not present, use all columns that are not part of the metadata.
        N)r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   s           m/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/csv_loader.py__init__zCSVLoader.__init__c   s=    2 #* 0  B#6 .    returnc              #   ~  K   	 t        | j                  d| j                        5 }| j                  |      E d {    d d d        y 7 # 1 sw Y   y xY w# t        $ r}| j
                  rt        | j                        }|D ]f  }	 t        | j                  d|j                        5 }| j                  |      E d {  7   	 d d d         n<# 1 sw Y   nxY wY# t        $ r Y dw xY w nt        d| j                         |Y d }~y Y d }~y d }~wt        $ r}t        d| j                         |d }~ww xY ww)N )newliner   zError loading )	openr   r   _CSVLoader__read_fileUnicodeDecodeErrorr   r   RuntimeError	Exception)r   csvfileedetected_encodingsr   s        r   	lazy_loadzCSVLoader.lazy_load   s;    	Idnnb4==I 5W++G4445 545 5! 	M''%:4>>%J" 2 !H!! NNBARAR "$'+'7'7'@@@!	" " " "
 . ! !! #^DNN3C#DE1L!"  	I/?@AqH	Is   D="A AA	A A D=	AAA D=A 	D: &D"C)C>C
?CCDCCD	C(%D'C((D
D=D:D55D::D=r)   c              #      K   t        j                  |fi  j                  }t        |      D ]  \  }}	  j                  | j                     nt         j                        }dj                   fd|j                         D              }||d} j                  D ]  }	 ||   ||<    t        ||        y # t        $ r t        d j                   d      w xY w# t        $ r t        d| d      w xY ww)NzSource column 'z' not found in CSV file.
c           	   3   Z  K   | ]  \  }}j                   r|j                   v rn|j                  vrt||j                         n| dt        |t              r|j                         n:t        |t
              r)dj                  t        t        j                  |            n|   y w)Nz: ,)r   r   strip
isinstancestrlistjoinmap).0kvr   s      r   	<genexpr>z(CSVLoader.__read_file.<locals>.<genexpr>   s        Aq ++ ---$"7"77 #$-QWWYQ7r!!S) GGI "!T* #cii"34:  s   B(B+)sourcerowzMetadata column ')page_contentmetadata)csv
DictReaderr   	enumerater   r3   r   KeyError
ValueErrorr5   itemsr   r   )	r   r)   
csv_readerir<   r;   contentr>   cols	   `        r   __read_filezCSVLoader.__read_file   s0    ^^G=t}}=

+  	DFAs	 ))5 **+T^^,  ii    IIK  G #)3H,, XX$'HHSMX
 (CCA 	D   %d&8&8%99QR .   X$'8=U%VWWXs4   2D0B?&<D#C%+D?#C""D%C>>D)Nr   NNF)__name__
__module____qualname____doc__r   r3   r   r	   r
   r   boolr   r   r   r,   r   r%   r   r   r   r   r      s    Pj (,*,#'"&$)/ *,/d#/  }/ #3-	/
 4./ 3-/ "/ "#/BI8H- I*"D= "DXh5G "Dr   r   c                   @     e Zd ZdZ	 ddededef fdZdefdZ xZ	S )	UnstructuredCSVLoadera|  Load `CSV` files using `Unstructured`.

    Like other
    Unstructured loaders, UnstructuredCSVLoader can be used in both
    "single" and "elements" mode. If you use the loader in "elements"
    mode, the CSV file will be a single Unstructured Table element.
    If you use the loader in "elements" mode, an HTML representation
    of the table will be available in the "text_as_html" key in the
    document metadata.

    Examples
    --------
    from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader

    loader = UnstructuredCSVLoader("stanley-cups.csv", mode="elements")
    docs = loader.load()
    r   modeunstructured_kwargsc                 B    t        d       t        |   d||d| y)a  

        Args:
            file_path: The path to the CSV file.
            mode: The mode to use when loading the CSV file.
              Optional. Defaults to "single".
            **unstructured_kwargs: Keyword arguments to pass to unstructured.
        z0.6.8)min_unstructured_version)r   rQ   Nr   )r   superr   )r   r   rQ   rR   	__class__s       r   r   zUnstructuredCSVLoader.__init__   s%     	&wGO94O;NOr   r    c                 J    ddl m}  |dd| j                  i| j                  S )Nr   )partition_csvfilenamer   )unstructured.partition.csvrX   r   rR   )r   rX   s     r   _get_elementsz#UnstructuredCSVLoader._get_elements   s"    <QdnnQ8P8PQQr   )single)
rJ   rK   rL   rM   r3   r   r   r   r[   __classcell__)rV   s   @r   rP   rP      s<    & +3PP$'PKNPRt Rr   rP   )r?   ior   pathlibr   typingr   r   r   r   r	   r
   r   langchain_core.documentsr   )langchain_community.document_loaders.baser   ,langchain_community.document_loaders.helpersr   1langchain_community.document_loaders.unstructuredr   r   r   rP   r   r   r   <module>re      sE    
   G G G - @ NkD
 kD\$R2 $Rr   