
    h<                    4   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ  ej8                  e      Z	  G d	 d
e      Z	  G d d      Z 	  G d de      Z! G d de      Z"y)    )annotationsN)
HTMLParser)TYPE_CHECKINGAnyDictListOptionalTupleUnion)
BaseLoader)Document)TextSplitter)
Connectionc                  <     e Zd ZdZd fdZddZddZd	dZ xZS )
ParseOracleDocMetadatazParse Oracle doc metadata...c                ^    t         |           | j                          d| _        i | _        y )NF)super__init__resetmatchmetadata)self	__class__s    k/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/oracleai.pyr   zParseOracleDocMetadata.__init__%   s%    


(*    c                    |dk(  r,d}|D ]$  \  }}|dk(  r|}|dk(  s|s|| j                   |<   & y |dk(  rd| _        y y )Nmeta namecontenttitleT)r   r   )r   tagattrsentryr   values         r   handle_starttagz&ParseOracleDocMetadata.handle_starttag+   s[    &=#%E$ 5e6>!E9$/4e,5 G^DJ r   c                J    | j                   r|| j                  d<   d| _         y y )Nr!   F)r   r   )r   datas     r   handle_dataz"ParseOracleDocMetadata.handle_data7   s#    ::%)DMM'"DJ r   c                    | j                   S N)r   )r   s    r   get_metadataz#ParseOracleDocMetadata.get_metadata<   s    }}r   )returnNone)r"   strr#   zList[Tuple[str, Optional[str]]]r-   r.   )r(   r/   r-   r.   )r-   Dict[str, Any])	__name__
__module____qualname____doc__r   r&   r)   r,   __classcell__r   s   @r   r   r   "   s    &+

r   r   c                  F    e Zd ZdZeddd       Ze	 	 	 	 	 	 	 	 dd       Zy)OracleDocReaderzRead a fileNc                   d}d}| &dj                  t        j                  dd            } t        t	        j                               }t        j                  d|      }t        j                  | j                               j                         }|d | }t        j                  dt        j                  d            }||z   |z   }|j                         }|j                  |      }|d | }|S )N       r   >abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789   )kz>I)joinrandomchoicesinttimestructpackhashlibsha256encodedigestgetrandbitshexzfill)	input_string
out_lengthhash_len	timestamptimestamp_binhashval_bincounter_bin	object_idobject_id_hexs	            r   generate_object_idz"OracleDocReader.generate_object_idF   s    
77TL 		$	D)4 nn\%8%8%:;BBD!)8, kk$(:(:2(>? "K/+=	!%++
 &kz2r   c                4   i }	 ddl }	 d|j                  _        | j	                         }t        |d      5 }|j                         }ddd       t        d|      S |j                  |j                        }	|j                  |j                        }
|j                  d|t        j                  |      |	|
	       |j                          |	i }nft        |	j                               }|j!                  d
      s|j!                  d      r+t#               }|j%                  |       |j'                         }t(        j+                  | j,                  dz   |z         }||d<   ||d<   |
t        d|      S t        t        |
j                               |      S # t        $ r}t        d      |d}~ww xY w# 1 sw Y   rxY w# t.        $ rJ}t0        j3                  d|        t0        j3                  d|        j                          Y d}~yd}~ww xY w)zRead a file using OracleReader
        Args:
            conn: Oracle Connection,
            file_path: Oracle Directory,
            params: ONNX file name.
        Returns:
            Plain text and metadata as Langchain Document.
        r   NIUnable to import oracledb, please install with `pip install -U oracledb`.Frbr   page_contentr   a  
                declare
                    input blob;
                begin
                    input := :blob;
                    :mdata := dbms_vector_chain.utl_to_text(input, json(:pref));
                    :text := dbms_vector_chain.utl_to_text(input);
                end;)blobprefmdatatext<!DOCTYPE html<HTML>$_oid_fileAn exception occurred :: zSkip processing )oracledbImportErrordefaults
fetch_lobscursoropenreadr   varDB_TYPE_CLOBexecutejsondumpscloser/   getvalue
startswithr   feedr,   r8   rV   username	Exceptionloggerinfo)conn	file_pathparamsr   rf   erj   fr(   r^   r_   doc_datapdoc_idexs                  r   	read_filezOracleDocReader.read_filei   s    $&	4	+0H([[]Fi&  !vvx  |R(CCJJx445E::h334DNN ZZ'   LLN}u~~/0&&'78H<O<O= /0AFF8$ ~~/H$778Ki8WXF%HV )HW|R(CCS-AHUUg  	- 	   X  	KK3B489KK*9+67LLN		sS   F -G F7G DG 6#G 	F4#F//F47G<G 	HA HHr+   )rM   zUnion[str, None]r-   r/   )rz   r   r{   r/   r|   dictr-   zUnion[Document, None])r1   r2   r3   r4   staticmethodrV   r    r   r   r8   r8   C   sR       D HH%(H26H	H Hr   r8   c                  ,     e Zd ZdZd fdZddZ xZS )OracleDocLoaderzwRead documents using OracleDocLoader
    Args:
        conn: Oracle Connection,
        params: Loader parameters.
    c                    || _         t        j                  t        j                  |            | _        t        |   di | y )Nr   )rz   rp   loadsrq   r|   r   r   )r   rz   r|   kwargsr   s       r   r   zOracleDocLoader.__init__   s3    	jjF!34"6"r   c           	        	 ddl }d}g }i }ddi}	 | j                  | j                  j                  d      | _        | j                  j                  d      | _        | j                  j                  d      | _        | j                  j                  d	      | _        | j                  j                  d
      | _        nt        d      d|j                  _        | j                  r@t        j                  | j                  | j                  |      }||S |j                  |       | j
                  rd}t!        j"                  | j
                        D ]  }	t         j$                  j'                  | j
                  |	      }
t         j$                  j)                  |
      sMt        j                  | j                  |
|      }||dz   }t*        j-                  d| d       |j                  |        | j                  r	 | j                  | j                  t        d      | j                  j/                         }| j                  j                  d      | _        | j0                  t3        | j0                        dkD  rt        d      d}|j5                  || j                  j7                         | j                  j7                                |j9                         }|D ]&  }|d   | j0                  v s|d   dvst        d       d| _        | j0                  (| j0                  D ]  }| j:                  dz   |z   | _         d| j                  z   dz   t=        j>                  |      z   dz   | j                  z   dz   | j:                  z   dz   | j                  z   dz   | j                  z   d z   }|j5                  |       |D ]  }i }|{t        jA                  | j                  jB                  d!z   | j                  z   d!z   | j                  z   d!z   | j                  z         }||d"<   |j                  tE        d#|$             |d   [tG        |d         }|jI                  d%      s|jI                  d&      r+tK               }|jM                  |       |jO                         }t        jA                  | j                  jB                  d!z   | j                  z   d!z   | j                  z   d!z   | j                  z   d!z   tG        |d'         z         }||d"<   |d'   |d(<   | j0                  t3        | j0                        }tQ        d|      D ]  }||d'z      || j0                  |   <    |d   |j                  tE        d#|$             |j                  tE        tG        |d         |$              	 |S |S # t        $ r}t        d      |d}~ww xY w# t        $ rB}t*        j-                  d)|        tS        jT                          jW                           d}~ww xY w# t        $ r2}t*        j-                  d)|        tS        jT                           d}~ww xY w)*z,Load data into LangChain Document objects...r   NrX   	plaintextfalsefiledirowner	tablenamecolnamezMissing loader parametersF   zTotal skipped: 
z%Missing owner or column name or both.
mdata_cols   z?Exceeds the max number of columns you can request for metadata.zgselect column_name, data_type from all_tab_columns where owner = :ownername and table_name = :tablename)	ownernamer   )NUMBERBINARY_DOUBLEBINARY_FLOATLONGDATE	TIMESTAMPVARCHAR2zDThe datatype for the column requested for metadata is not supported.z, rowidz, z'select dbms_vector_chain.utl_to_text(t.z, json('z+')) mdata, dbms_vector_chain.utl_to_text(t.z) textz from .z trb   rc   r   rZ   r`   ra      _rowidre   ),rf   rg   r|   getr   r   r   r   r   rw   rh   ri   r8   r   rz   appendoslistdirpathr?   isfilerx   ry   rj   r   lenro   upperfetchallmdata_cols_sqlrp   rq   rV   rv   r   r/   rt   r   ru   r,   range	traceback	print_excrr   )r   rf   r}   ncolsresultsr   m_paramsdoc
skip_count	file_namer{   rj   sqlrowsrowcolr   r(   r   ir   s                        r   loadzOracleDocLoader.load   sD   	 "$#%)g	{{& KKOOF3	;;??51![[__W5
!%!=#{{y9 ;<<+0H(yy%//		499hO;"Ns#xx
!#DHH!5 	0I "TXXy AIww~~i0-77		9hW;)3aJ"KK/*R(HI#NN3/	0 ~~~zz)T\\-A'(OPP!YY--/F&*kkool&CDO2t/!3"+!B# 8 
 &*jj&6&6&8&*nn&:&:&< '   &0#' &C"1v8#&q6 2" $" +4)K+& %&&  +4D'2#'?? SC262E2E2Ls2RD/S
 B,,'$% **X./ H	H
 ,,' ## --. ## **	% 
 ..)    NN3'% 7"#%;%4%G%G $		 2 2"%!&"&**!- #&!& #'..	!1
 #&!& #',,!/&F 06HV,#NN8h+WX"1v1'*3q6{#'??3C#D$,I" )?(@A$%FF4L/0~~/?H%4%G%G $		 2 2"%!&"&**!- #&!& #'..	!1
 #&!& #',,!/ #&!& #&c!f+!.
&F 06HV,14QHX.  $:(+DOO(<%*1e_ J?B1q5z); <J  #1v~ '$,"x$P!" !($,58Q[8%&!"g7"| N7N[  	- 	N ! KK";B4 @A'')LLN	  	KK3B489!	su   W DX, B
X,  A"X, C8W <W J8W =X, ?X, 	W
WW	X)'=X$$X))X, ,	Y'5-Y""Y')rz   r   r|   r0   r   r   )r-   zList[Document])r1   r2   r3   r4   r   r   r5   r6   s   @r   r   r      s    #
ur   r   c                  ,     e Zd ZdZd fdZddZ xZS )OracleTextSplitterz$Splitting text using Oracle chunker.c                    || _         || _        t        |   di | 	 ddl}	 ddl}|| _        || _        y# t        $ r}t        d      |d}~ww xY w# t        $ r t        d      w xY w)zInitialize.r   NrX   zjoracledb or json or both are not installed. Please install them. Recommendations: `pip install oracledb`. r   )	rz   r|   r   r   rp   rf   rg   	_oracledb_json)r   rz   r|   r   rp   rf   r}   r   s          r   r   zOracleTextSplitter.__init__  s    	"6"	 &DNDJ  !1   	> 	s+   A 8 A 	AAAA A*c                j   	 ddl }g }	 d| j                  j                  _        | j
                  j                         }|j                  |j                         |j                  d|| j                  j                  | j                               	 |j                         }|	 |S | j                  j                  |d         }|j                  |d          H# t        $ r}t        d      |d}~ww xY w# t         $ r2}t"        j%                  d	|        t'        j(                           d}~ww xY w)
z&Split incoming text and return chunks.r   NrX   F)r    zUselect t.column_value from dbms_vector_chain.utl_to_chunks(:content, json(:params)) t)r    r|   
chunk_datare   )rf   rg   r   rh   ri   rz   rj   setinputsizesCLOBro   r   rq   r|   fetchoner   r   rw   rx   ry   r   r   )	r   r_   rf   r}   splitsrj   r   dr   s	            r   
split_textzOracleTextSplitter.split_text  s+   	 	16DNN##.YY%%'F   7NNOzz''4	   oo'; M JJ$$SV,ao. -  	- 	>  	KK3B489!	s5   C BC7 '3C7 	C4#C//C47	D2 -D--D2)rz   r   r|   r0   r   r   r-   r.   )r_   r/   r-   z	List[str])r1   r2   r3   r4   r   r   r5   r6   s   @r   r   r   |  s    .2'r   r   )#
__future__r   rF   rp   loggingr   r@   rD   rC   r   html.parserr   typingr   r   r   r   r	   r
   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_text_splittersr   rf   r   	getLoggerr1   rx   r   r8   r   r   r   r   r   <module>r      s    #    	     " I I I 6 - 1#			8	$ "Z < o od Aj AHC Cr   