
    h=0                     T   U d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZdZ G d de      Z G d de      Z G d de      Z  G d de      Z!ee e!dZ"e	e#ee   f   e$d<    G d de%      Z& G d de      Z'y)zWrapper around scikit-learn NearestNeighbors implementation.

The vector store can be persisted in json, bson or parquet format.
    N)ABCabstractmethod)AnyDictIterableListLiteralOptionalTupleType)uuid4)Document)
Embeddings)guard_import)VectorStore)maximal_marginal_relevance      c                   p    e Zd ZdZdeddfdZeedefd              Zede	ddfd       Z
ede	fd	       Zy)
BaseSerializerz Base class for serializing data.persist_pathreturnNc                     || _         y Nr   )selfr   s     f/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/sklearn.py__init__zBaseSerializer.__init__   s
    (    c                      y)z>The file extension suggested by this serializer (without dot).N clss    r   	extensionzBaseSerializer.extension       r   datac                      y)z"Saves the data to the persist_pathNr!   r   r&   s     r   savezBaseSerializer.save#   r%   r   c                      y)z$Loads the data from the persist_pathNr!   r   s    r   loadzBaseSerializer.load'   r%   r   )__name__
__module____qualname____doc__strr   classmethodr   r$   r   r)   r,   r!   r   r   r   r      s{    *)S )T ) M# M  M 1 1 1 1 3c 3 3r   r   c                   B    e Zd ZdZedefd       ZdeddfdZdefdZ	y)JsonSerializerzKSerialize data in JSON using the json package from python standard library.r   c                      y)Njsonr!   r"   s    r   r$   zJsonSerializer.extension/       r   r&   Nc                     t        | j                  d      5 }t        j                  ||       d d d        y # 1 sw Y   y xY w)Nw)openr   r6   dumpr   r&   fps      r   r)   zJsonSerializer.save3   s7    $##S) 	 RIIdB	  	  	 s	   7A c                     t        | j                  d      5 }t        j                  |      cd d d        S # 1 sw Y   y xY w)Nr)r:   r   r6   r,   r   r=   s     r   r,   zJsonSerializer.load7   s4    $##S) 	!R99R=	! 	! 	!s   6?)
r-   r.   r/   r0   r2   r1   r$   r   r)   r,   r!   r   r   r4   r4   ,   s=    U#       !c !r   r4   c                   ^     e Zd ZdZdeddf fdZedefd       ZdeddfdZ	defd	Z
 xZS )
BsonSerializerz>Serialize data in Binary JSON using the `bson` python package.r   r   Nc                 D    t         |   |       t        d      | _        y Nbson)superr   r   rE   r   r   	__class__s     r   r   zBsonSerializer.__init__?   s    & (	r   c                      yrD   r!   r"   s    r   r$   zBsonSerializer.extensionC   r7   r   r&   c                     t        | j                  d      5 }|j                  | j                  j	                  |             d d d        y # 1 sw Y   y xY w)Nwb)r:   r   writerE   dumpsr<   s      r   r)   zBsonSerializer.saveG   sB    $##T* 	,bHHTYY__T*+	, 	, 	,s   +AAc                     t        | j                  d      5 }| j                  j                  |j	                               cd d d        S # 1 sw Y   y xY w)Nrb)r:   r   rE   loadsreadr@   s     r   r,   zBsonSerializer.loadK   s?    $##T* 	.b99??2779-	. 	. 	.s   )A

Ar-   r.   r/   r0   r1   r   r2   r$   r   r)   r,   __classcell__rH   s   @r   rB   rB   <   sS    H)S )T ) #  , , ,.c .r   rB   c                   ^     e Zd ZdZdeddf fdZedefd       ZdeddfdZ	defd	Z
 xZS )
ParquetSerializerzFSerialize data in `Apache Parquet` format using the `pyarrow` package.r   r   Nc                     t         |   |       t        d      | _        t        d      | _        t        d      | _        y )Npandaspyarrowzpyarrow.parquet)rF   r   r   pdpapqrG   s     r   r   zParquetSerializer.__init__S   s5    &x(y)01r   c                      y)Nparquetr!   r"   s    r   r$   zParquetSerializer.extensionY   s    r   r&   c                 r   | j                   j                  |      }| j                  j                  j	                  |      }t
        j                  j                  | j                        rut        | j                        dz   }t        j                  | j                  |       	 | j                  j                  || j                         t        j                  |       y | j                  j                  || j                         y # t        $ r'}t        j                  || j                         |d }~ww xY w)Nz-backup)rZ   	DataFramer[   Tablefrom_pandasospathexistsr   r1   renamer\   write_tableremove	Exception)r   r&   dftablebackup_pathexcs         r   r)   zParquetSerializer.save]   s    WWt$))"-77>>$++,d//09<KIId''5'##E4+<+<=
 		+&GGt'8'89  		+t'8'89	s   #&D 	D6"D11D6c                     | j                   j                  | j                        }|j                         }|j	                         D ci c]  \  }}||j                          c}}S c c}}w r   )r\   
read_tabler   	to_pandasitemstolist)r   rk   rj   colseriess        r   r,   zParquetSerializer.loadm   sQ    ""4#4#45__8:
CfV]]_$CCCs   	A(rR   rT   s   @r   rV   rV   P   sU    P2S 2T 2 #  : : : Dc Dr   rV   r6   rE   r^   SERIALIZER_MAPc                       e Zd ZdZy)SKLearnVectorStoreExceptionz'Exception raised by SKLearnVectorStore.N)r-   r.   r/   r0   r!   r   r   rx   rx   z   s    1r   rx   c                   &   e Zd ZdZdddddedee   ded	   d
ededdfdZ	e
defd       Zd$dZd$dZ	 	 d%dee   deee      deee      dedee   f
dZd$dZeddee   dededeeeef      fdZeddedededeeeef      fdZefdedededee   fdZefdedededeeeef      fdZeedfdee   deded ededee   fd!Zeedfdededed ededee   fd"Ze	 	 	 d&dee   dedeee      deee      dee   dedd fd#       Z y)'SKLearnVectorStorezYSimple in-memory vector store based on the `scikit-learn` library
    `NearestNeighbors`.Nr6   cosine)r   
serializermetric	embeddingr   r|   ru   r}   kwargsr   c                   t        d      }t        dd      }|| _         |j                  dd|i|| _        d| _        || _        || _        d | _        | j                  !t        |   } || j                        | _        g | _	        g | _
        g | _        g | _        |j                  g       | _        | j                  ;t        j                   j#                  | j                        r| j%                          y y y )	Nnumpyzsklearn.neighborszscikit-learn)pip_namer}   Fr   r!   )r   _npNearestNeighbors
_neighbors_neighbors_fitted_embedding_function_persist_path_serializerrv   _embeddings_texts
_metadatas_idsasarray_embeddings_nprc   rd   isfile_load)	r   r~   r   r|   r}   r   npsklearn_neighborsserializer_clss	            r   r   zSKLearnVectorStore.__init__   s     '"()<~V <+<<UFUfU!&#, )59)+J7N-4;M;MND /1!#&(!	 $&::b>)bggnnT=O=O.PJJL /Q)r   c                     | j                   S r   )r   r+   s    r   
embeddingszSKLearnVectorStore.embeddings   s    '''r   c                     | j                   t        d      | j                  | j                  | j                  | j
                  d}| j                   j                  |       y )NzFYou must specify a persist_path on creation to persist the collection.)idstexts	metadatasr   )r   rx   r   r   r   r   r)   r(   s     r   persistzSKLearnVectorStore.persist   s[    #-X  99[[**	
 	d#r   c                     | j                   t        d      | j                   j                         }|d   | _        |d   | _        |d   | _        |d   | _        | j                          y )NzCYou must specify a persist_path on creation to load the collection.r   r   r   r   )r   rx   r,   r   r   r   r   _update_neighborsr(   s     r   r   zSKLearnVectorStore._load   so    #-U  $$&-7m{+K	 r   r   r   r   c                    t        |      }|xs! |D cg c]  }t        t                      c}}| j                  j	                  |       | j
                  j	                  | j                  j                  |             | j                  j	                  |xs i gt        |      z         | j                  j	                  |       | j                          |S c c}w r   )listr1   r   r   extendr   r   embed_documentsr   lenr   r   )r   r   r   r   r   r   _r   s           r   	add_textszSKLearnVectorStore.add_texts   s     e4V4s57|46" 8 8 H H PQy@bTCK-?A		  5s   Cc                     t        | j                        dk(  rt        d      | j                  j	                  | j                        | _        | j                  j                  | j
                         d| _        y )Nr   (No data was added to SKLearnVectorStore.T)	r   r   rx   r   r   r   r   fitr   r+   s    r   r   z$SKLearnVectorStore._update_neighbors   sd    t A%-:  #hh..t/?/?@D//0!%r   )kquery_embeddingr   c                    | j                   st        d      | j                  j                  |g|      \  }}t	        t        |d   |d               S )zgSearch k embeddings similar to the query embedding. Returns a list of
        (index, distance) tuples.r   )n_neighborsr   )r   rx   r   
kneighborsr   zip)r   r   r   r   neigh_dists
neigh_idxss         r   #_similarity_index_search_with_scorez6SKLearnVectorStore._similarity_index_search_with_score   sa    
 %%-:  #'//"<"<1 #= #
Z C
1{1~677r   queryc          
         | j                   j                  |      } | j                  |fd|i|}|D cg c]?  \  }}t        | j                  |   d| j
                  |   i| j                  |         |fA c}}S c c}}w )Nr   idpage_contentmetadata)r   embed_queryr   r   r   r   r   )r   r   r   r   r   indices_distsidxdists           r   similarity_search_with_scorez/SKLearnVectorStore.similarity_search_with_score   s     22>>uE@@@
 
$*
 +	
 T !%S!1"DIIcNKdooc6JK 	
 		
 	
s   AA>c                 b     | j                   |fd|i|}|D cg c]  \  }}|	 c}}S c c}}w )Nr   )r   )r   r   r   r   docs_scoresdocr   s          r   similarity_searchz$SKLearnVectorStore.similarity_search   s8     8d77MMfM"-.Q...s   +c                      | j                   |fd|i|}t        | \  }}|D cg c]  }dt        j                  |      z   }}t	        t        t	        |      |            S c c}w )Nr      )r   r   mathexpr   )	r   r   r   r   
docs_distsdocsdistsr   scoress	            r   (_similarity_search_with_relevance_scoresz;SKLearnVectorStore._similarity_search_with_relevance_scores  si     7T66uLLVL
:&e167!dhhtn$77CT
F+,, 8s   A$g      ?fetch_klambda_multc           	          | j                   |fd|i|}t        | \  }}| j                  |f   }	t        | j                  j                  || j                  j                        |	||      }
|
D cg c]  }||   	 }}|D cg c]:  }t        | j                  |   d| j                  |   i| j                  |         < c}S c c}w c c}w )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )dtype)r   r   r   r   )r   r   r   r   r   arrayfloat32r   r   r   r   )r   r~   r   r   r   r   r   indicesr   result_embeddingsmmr_selectedimmr_indicesr   s                 r   'max_marginal_relevance_search_by_vectorz:SKLearnVectorStore.max_marginal_relevance_search_by_vector  s    , A@@
 
$*
 -(
 //91HHNN9DHH,<,<N=#	
 ,88awqz88 #

 	 ![[-		#G$//#2FG
 	
 9
s   3C?Cc                     | j                   t        d      | j                   j                  |      }| j                  ||||      }|S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        zCFor MMR search, you must specify an embedding function on creation.)
lambda_mul)r   
ValueErrorr   r   )r   r   r   r   r   r   r~   r   s           r   max_marginal_relevance_searchz0SKLearnVectorStore.max_marginal_relevance_search5  s]    , ##+U  ,,88?	;;q'k < 
 r   c                 J    t        |fd|i|}|j                  |||       |S )Nr   )r   r   )rz   r   )r#   r   r~   r   r   r   r   vss           r   
from_textszSKLearnVectorStore.from_textsV  s/      	OOO
UiS9	r   )r   N)NN)NNN)!r-   r.   r/   r0   r   r
   r1   r	   r   r   propertyr   r   r   r   r   dictr   r   	DEFAULT_Kfloatintr   r   r   r   r   r   DEFAULT_FETCH_Kr   r   r2   r   r!   r   r   rz   rz      s    '+9?!! sm	!
 56! ! ! 
!F (J ( ($
! +/#'	} DJ' d3i 	
  
c & 9B8#E{8258MP8	eCJ	 8 '0

 #
;>
	eHeO$	%
& $-// /8;/	h/ $--- -8;-	eHeO$	%- & (
;(
 (
 	(

 (
 (
 
h(
Z &   	
   
hB 
 +/#'&*Cy  DJ'	
 d3i  sm  
 r   rz   )(r0   r6   r   rc   abcr   r   typingr   r   r   r   r	   r
   r   r   uuidr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   r   r   r   r4   rB   rV   rv   r1   __annotations__RuntimeErrorrx   rz   r!   r   r   <module>r      s   
   	 # L L L  - 0 - 3 M	3S 3(!^ ! .^ .( D  DH  3S$~../ 	, 	b br   