
    hR                       d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ erd dlmZ d d	lmZmZ  ej>                  e       Z!dd
Z" edd      Z# G d de      Z$y)    )annotationsN)	TYPE_CHECKINGAnyCallableIterableListOptionalTupleTypeVarUnion)Document)
Embeddings)VectorStore)DistanceStrategymaximal_marginal_relevanceClient)NeighborVectorDistanceMetricc                 N    	 ddl m}  | S # t        $ r}t        d      |d }~ww xY w)Nr   r   zoCould not import aerospike_vector_search python package. Please install it with `pip install aerospike_vector`.)aerospike_vector_searchr   ImportError)r   es     h/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/aerospike.py_import_aerospiker   #   s;    2 M  E
 	s   
 	$$AVST	Aerospike)boundc                  N   e Zd ZdZdddddej
                  f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZedd       ZddZ	dd	Z
e	 	 	 	 dd
       Z	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 ddZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d dZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d!dZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d"dZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d#dZd$dZed%d       Z	 	 	 	 	 d&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'dZ	 	 	 	 	 d&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d(dZe	 	 	 	 	 	 	 d)	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d*d       Zy)+r   zu`Aerospike` vector store.

    To use, you should have the ``aerospike_vector_search`` python package installed.
    N_vector_text_idc
                D   t               }
t        |t              st        j                  d       t        ||
      st        dt        |             || _        || _        || _	        || _
        || _        || _        || _        || _        | j                  |	      | _        y)a  Initialize with Aerospike client.

        Args:
            client: Aerospike client.
            embedding: Embeddings object or Callable (deprecated) to embed text.
            namespace: Namespace to use for storing vectors. This should match
            index_name: Name of the index previously created in Aerospike. This
            vector_key: Key to use for vector in metadata. This should match the
                key used during index creation.
            text_key: Key to use for text in metadata.
            id_key: Key to use for id in metadata.
            set_name: Default set name to use for storing vectors.
            distance_strategy: Distance strategy to use for similarity search
                This should match the distance strategy used during index creation.
        z`Passing in `embedding` as a Callable is deprecated. Please pass in an Embeddings object instead.zDclient should be an instance of aerospike_vector_search.Client, got N)r   
isinstancer   warningswarn
ValueErrortype_client
_embedding	_text_key_vector_key_id_key_index_name
_namespace	_set_nameconvert_distance_strategy_distance_strategy)selfclient	embedding	namespace
index_name
vector_keytext_keyid_keyset_namedistance_strategy	aerospikes              r   __init__zAerospike.__init__7   s    < &'	)Z0MM.
 &),F|n& 
 #!%%#!"&"@"@AR"S    c                P    t        | j                  t              r| j                  S y)z/Access the query embedding object if available.N)r$   r*   r   r3   s    r   
embeddingszAerospike.embeddingsm   s     dooz2??"r?   c                    t        | j                  t              r$| j                  j                  t	        |            S |D cg c]  }| j                  |       c}S c c}w )zEmbed search docs.)r$   r*   r   embed_documentslist)r3   textsts      r   _embed_documentszAerospike._embed_documentst   sF    dooz2??224;??,12q"222s   Ac                    t        | j                  t              r| j                  j                  |      S | j                  |      S )zEmbed query text.)r$   r*   r   embed_query)r3   texts     r   _embed_queryzAerospike._embed_queryz   s4    dooz2??..t44t$$r?   c                   ddl m} t        | t              r| S | |j                  k(  rt        j                  S | |j
                  k(  rt        j
                  S | |j                  k(  rt        j                  S t        d      )z
        Convert Aerospikes distance strategy to langchains DistanceStrategy
        enum. This is a convenience method to allow users to pass in the same
        distance metric used to create the index.
        r   )r   DUnknown distance strategy, must be cosine, dot_product, or euclidean)	aerospike_vector_search.typesr   r$   r   COSINEDOT_PRODUCTSQUARED_EUCLIDEANEUCLIDEAN_DISTANCEr'   )r<   r   s     r   r1   z#Aerospike.convert_distance_strategy   s{     	G')9:$$ 4 ; ;;#*** 4 @ @@#/// 4 F FF#666R
 	
r?   c           
     4   || j                   }|| j                  }|r|t        d      t        |      }|xs+ |D 	cg c]  }	t	        t        j                               ! c}	}|r|D 
cg c]  }
|
j                          }}
n|xs |D 	cg c]  }	i  c}	}t        dt        |      |      D ]  }||||z    }||||z    }||||z    }| j                  |      }t        |||      D ]$  \  }}}||| j                  <   ||| j                  <   & t        ||      D ]?  \  }}||| j                  <    | j                  j                   d| j"                  |||d| A  |r'| j                  j%                  | j"                  |       |S c c}	w c c}
w c c}	w )a  Run more texts through the embeddings and add to the vectorstore.


        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadata associated with the texts.
            ids: Optional list of ids to associate with the texts.
            set_name: Optional aerospike set name to add the texts to.
            batch_size: Batch size to use when adding the texts to the vectorstore.
            embedding_chunk_size: Chunk size to use when embedding the texts.
            index_name: Optional aerospike index name used for waiting for index
                completion. If not provided, the default index_name will be used.
            wait_for_index: If True, wait for the all the texts to be indexed
                before returning. Requires index_name to be provided. Defaults
                to True.
            kwargs: Additional keyword arguments to pass to the client upsert call.

        Returns:
            List of ids from adding the texts into the vectorstore.

        z6if wait_for_index is True, index_name must be providedr   )r6   keyr;   record_data)r6   name )r0   r.   r'   rE   struuiduuid4copyrangelenrH   zipr,   r+   r-   r)   upsertr/   wait_for_index_completion)r3   rF   	metadatasidsr;   embedding_chunk_sizer7   wait_for_indexkwargs_michunk_texts	chunk_idschunk_metadatasrB   metadatar5   rK   ids                       r   	add_textszAerospike.add_texts   s   @ ~~H))Jj0UVVU77Ac$**,'7 +45a5I5!8%%8Qb%8Iq#e*&:; 	AA(<$<=KA$8 89I'A0D,DEO..{;J-0[. 0))T .7))*+/(	0 !$I ? H)+&### "oo% (	
 	, LL22// 3 
 
I 8 6%8s   $F,F	Fc                    ddl m} |r2|D ]-  }	  | j                  j                  d| j                  ||d| / y# |$ r Y  yw xY w)a7  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments to pass to client delete call.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        r   )AVSServerError)r6   rU   r;   FTrX   )r   rq   r)   deleter/   )r3   rc   r;   rf   rq   rn   s         r   rr   zAerospike.delete   sg      	; 	!!'DLL'' "&//!) !		!  & ! !s   *<AAc                N     | j                   | j                  |      f|||d|S )a  Return aerospike documents most similar to query, along with scores.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Name of the index to search. Overrides the default
                index_name.
            kwargs: Additional keyword arguments to pass to the search method.

        Returns:
            List of Documents most similar to the query and associated scores.
        kmetadata_keysr7   )&similarity_search_by_vector_with_scorerL   )r3   queryru   rv   r7   rf   s         r   similarity_search_with_scorez&Aerospike.similarity_search_with_score  s?    . ;t::e$
'!	

 
 	
r?   c           	        g }|r| j                   |vr| j                   g|z   }|| j                  }|t        d       | j                  j                  d|| j
                  |||d|}|D ]  }|j                  }	| j                   |	v rF|	j                  | j                         }
|j                  }|j                  t        |
|	      |f       ct        j                  d| j                    d        |S )a  Return aerospike documents most similar to embedding, along with scores.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Name of the index to search. Overrides the default
                index_name.
            kwargs: Additional keyword arguments to pass to the client
                vector_search method.

        Returns:
            List of Documents most similar to the query and associated scores.

        zindex_name must be provided)r7   r6   rx   limitfield_names)page_contentrm   zFound document with no `z` key. Skipping.rX   )r+   r.   r'   r)   vector_searchr/   fieldspopdistanceappendr   loggerwarning)r3   r5   ru   rv   r7   rf   docsresultsresultrm   rK   scores               r   rw   z0Aerospike.similarity_search_by_vector_with_score*  s   2 T^^=@!^^,}<M))J:;;"<$,,"<"< #
!oo%#
 #
  	F}}H~~)||DNN3X4(KUST.t~~.>>NO 	 r?   c                b     | j                   |f|||d|D cg c]  \  }}|	 c}}S c c}}w )ak  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Name of the index to search. Overrides the default
                index_name.
            kwargs: Additional keyword arguments to pass to the search method.


        Returns:
            List of Documents most similar to the query vector.
        rt   )rw   )r3   r5   ru   rv   r7   rf   docrg   s           r   similarity_search_by_vectorz%Aerospike.similarity_search_by_vectorf  sQ    2 F$EE+%	
 	
Q 	
 		
 	
s   +c                f     | j                   |f|||d|}|D cg c]  \  }}|	 c}}S c c}}w )a*  Return aerospike documents most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Optional name of the index to search. Overrides the
                default index_name.

        Returns:
            List of Documents most similar to the query and score for each
        rt   )ry   )	r3   rx   ru   rv   r7   rf   docs_and_scoresr   rg   s	            r   similarity_searchzAerospike.similarity_search  sG    * <$;;
m

NT
 #22Q222s   -c                   | j                   t        j                  k(  r| j                  S | j                   t        j                  k(  r| j
                  S | j                   t        j                  k(  r| j                  S t        d      )a  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.

        0 is dissimilar, 1 is similar.

        Aerospike's relevance_fn assume euclidean and dot product embeddings are
        normalized to unit norm.
        rN   )	r2   r   rP   _cosine_relevance_score_fnrQ   %_max_inner_product_relevance_score_fnrS   _euclidean_relevance_score_fnr'   rA   s    r   _select_relevance_score_fnz$Aerospike._select_relevance_score_fn  sw     ""&6&=&==222$$(8(D(DD===$$(8(K(KK555V r?   c                    d| dz  z
  S )zgAerospike returns cosine distance scores between [0,2]

        0 is dissimilar, 1 is similar.
              rX   )r   s    r   r   z$Aerospike._cosine_relevance_score_fn  s     EAIr?   c                   |r| j                   |vr| j                   g|z   } | j                  |f|||d|}t        t        j                  |gt        j
                        |D 	cg c]  }	|	j                  | j                       c}	||      }
|r=| j                   |v r/|
D ]*  }||   j                  j                  | j                          , |
D cg c]  }||   	 c}S c c}	w c c}w )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree of
                diversity among the results with 0 corresponding to maximum
                diversity and 1 to minimum diversity. Defaults to 0.5.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Optional name of the index to search. Overrides the
                default index_name.
        Returns:
            List of Documents selected by maximal marginal relevance.
        rt   )dtype)ru   lambda_mult)r,   r   r   nparrayfloat32rm   r   )r3   r5   ru   fetch_kr   rv   r7   rf   r   r   mmr_selectedri   s               r   'max_marginal_relevance_search_by_vectorz1Aerospike.max_marginal_relevance_search_by_vector  s    < T--]B!--.>M/t//
'!	

 
 2HHi[

37;<S\\$**+<#	
 T-->! 7Q  $$T%5%567 "..AQ.. = /s   & C"
C'c                V    | j                  |      } | j                  ||||f||d|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            index_name: Name of the index to search.
        Returns:
            List of Documents selected by maximal marginal relevance.
        )rv   r7   )rL   r   )	r3   rx   ru   r   r   rv   r7   rf   r5   s	            r   max_marginal_relevance_searchz'Aerospike.max_marginal_relevance_search  sL    6 %%e,	;t;;	

 (!
 
 	
r?   c
                T     | |||fi |
} |j                   |f||||d|	xs i  |S )a  
        This is a user friendly interface that:
            1. Embeds text.
            2. Converts the texts into documents.
            3. Adds the documents to a provided Aerospike index

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Aerospike
                from langchain_openai import OpenAIEmbeddings
                from aerospike_vector_search import Client, HostPort

                client = Client(seeds=HostPort(host="localhost", port=5000))
                aerospike = Aerospike.from_texts(
                    ["foo", "bar", "baz"],
                    embedder,
                    client,
                    "namespace",
                    index_name="index",
                    vector_key="vector",
                    distance_strategy=MODEL_DISTANCE_CALC,
                )
        )rb   rc   r7   rd   )ro   )clsrF   r5   rb   r4   r6   r7   rc   embeddings_chunk_sizeclient_kwargsrf   r=   s               r   
from_textszAerospike.from_texts  s_    P 
 	
	 			
!!6	
 "	
 r?   )r4   r   r5   zUnion[Embeddings, Callable]r6   rY   r7   Optional[str]r8   rY   r9   rY   r:   rY   r;   r   r<   z7Optional[Union[DistanceStrategy, VectorDistanceMetric]])returnzOptional[Embeddings])rF   Iterable[str]r   zList[List[float]])rK   rY   r   List[float])r<   z-Union[VectorDistanceMetric, DistanceStrategy]r   r   )NNN  NT)rF   r   rb   Optional[List[dict]]rc   Optional[List[str]]r;   r   rd   intr7   r   re   boolrf   r   r   	List[str])NN)rc   r   r;   r   rf   r   r   zOptional[bool])   NN)rx   rY   ru   r   rv   r   r7   r   rf   r   r   List[Tuple[Document, float]])r5   r   ru   r   rv   r   r7   r   rf   r   r   r   )r5   r   ru   r   rv   r   r7   r   rf   r   r   List[Document])rx   rY   ru   r   rv   r   r7   r   rf   r   r   r   )r   zCallable[[float], float])r   floatr   r   )r      g      ?NN)r5   r   ru   r   r   r   r   r   rv   r   r7   r   rf   r   r   r   )rx   rY   ru   r   r   r   r   r   rv   r   r7   r   rf   r   r   r   )NNtestNNr   N)rF   r   r5   r   rb   r   r4   r   r6   rY   r7   r   rc   r   r   r   r   zOptional[dict]rf   r   r   r   )__name__
__module____qualname____doc__r   rS   r>   propertyrB   rH   rL   staticmethodr1   ro   rr   ry   rw   r   r   r   r   r   r   classmethodr   rX   r?   r   r   r   1   s"    %)#"& //4T4T /4T 	4T
 "4T 4T 4T 4T  4T
4Tl  3% 
H
	
 
: +/#'"&$($(#NN (N !	N
  N "N "N N N 
Nd $("&    	
 
F -1$(

 
 +	

 "
 
 
&
D -1$(:: : +	:
 ": : 
&:~ -1$( 
 
  
 +	 

 " 
  
 
 
J -1$(33 3 +	3
 "3 3 
342    -1$(3/3/ 3/ 	3/
 3/ +3/ "3/ 3/ 
3/p  -1$($
$
 $
 	$

 $
 +$
 "$
 $
 
$
L 
 +/$(#'%)(,66 6 (	6
 6 6 "6 !6  #6 &6 6 
6 6r?   )r   r   )%
__future__r   loggingrZ   r%   typingr   r   r   r   r   r	   r
   r   r   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   r   r   r   rO   r   r   	getLoggerr   r   r   r   r   rX   r?   r   <module>r      sq    "   
 
 
  - 0 3
 .L			8	$ v[)d dr?   