
    hQ                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ ddZ ddZ! G d de      Z"y)    )annotationsN)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)AddableMixinDocstore)InMemoryDocstore)DistanceStrategyc                z    | t        j                  t         j                  j                  | dd      dd      z  } | S )z!Normalize vectors to unit length.T)axiskeepdimsg-q=N)npcliplinalgnorm)xs    d/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/scann.py	normalizer      s1    T:E4	HHAH    c                     t        d      S )z=
    Import `scann` if available, otherwise raise error.
    scannr    r    r   dependable_scann_importr$      s       r    c                  X   e Zd ZdZddej
                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	dddZ
	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd	Z	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd
Z	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d dZe	 	 	 d!	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"d       Ze	 	 d	 	 	 	 	 	 	 	 	 	 	 d#d       Ze	 	 d	 	 	 	 	 	 	 	 	 	 	 d$d       Zd%d&dZe	 d%dd	 	 	 	 	 	 	 	 	 	 	 d'd       Zd(dZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZy))ScaNNa  `ScaNN` vector store.

    To use, you should have the ``scann`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceEmbeddings
            from langchain_community.vectorstores import ScaNN

            model_name = "sentence-transformers/all-mpnet-base-v2"
            db = ScaNN.from_texts(
                ['foo', 'bar', 'barz', 'qux'],
                HuggingFaceEmbeddings(model_name=model_name))
            db.similarity_search('foo?', k=1)
    NFc	                t    || _         || _        || _        || _        || _        || _        || _        || _        y)z%Initialize with necessary components.N)	embeddingindexdocstoreindex_to_docstore_iddistance_strategyoverride_relevance_score_fn_normalize_L2_scann_config)	selfr(   r)   r*   r+   relevance_score_fnnormalize_L2r,   scann_configs	            r   __init__zScaNN.__init__3   sA     #
 $8!!2+=())r    c                ~    t        | j                  t              st        d| j                   d      t	        d      )NSIf trying to add texts, the underlying docstore should support adding items, which 	 does notz(Updates are not available in ScaNN, yet.)
isinstancer*   r   
ValueErrorNotImplementedError)r0   texts
embeddings	metadatasidskwargss         r   __addzScaNN.__addH   sC     $--6''+}}oY@  ""LMMr    c                x    | j                   j                  t        |            } | j                  ||f||d|S )al  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r=   r>   )r(   embed_documentslist_ScaNN__add)r0   r;   r=   r>   r?   r<   s         r   	add_textszScaNN.add_textsW   s;    $ ^^33DK@
tzz%TycTVTTr    c                    t        | j                  t              st        d| j                   d      t	        | \  }} | j
                  ||f||d|S )a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            text_embeddings: Iterable pairs of string and embedding to
                add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r6   r7   rB   )r8   r*   r   r9   ziprE   )r0   text_embeddingsr=   r>   r?   r;   r<   s          r   add_embeddingszScaNN.add_embeddingsl   se    $ $--6''+}}oY@ 
  1ztzz%TycTVTTr    c                    t        d      )a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        z*Deletions are not available in ScaNN, yet.)r:   )r0   r>   r?   s      r   deletezScaNN.delete   s     ""NOOr    c           
        t        j                  |gt         j                        }| j                  rt	        |      }| j
                  j                  |||n|      \  }}g }	t        |d         D ]  \  }
}|dk(  r| j                  |   }| j                  j                  |      t        t              st        d| d       |s|j                         D ci c]  \  }}|t        |t              s|gn| }}}t!        fd|j                         D              s|	j#                  |d   |
   f       |	j#                  |d   |
   f        |j%                  d      }|k| j&                  t(        j*                  t(        j,                  fv rt.        j0                  nt.        j2                  }|	D cg c]  \  }} |||      r||f }	}}|	d| S c c}}w c c}}w )	a  Return docs most similar to query.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        dtypeNr   r   zCould not find document for id z, got c              3  ^   K   | ]$  \  }}j                   j                  |      |v  & y wN)metadataget).0keyvaluedocs      r   	<genexpr>z?ScaNN.similarity_search_with_score_by_vector.<locals>.<genexpr>   s)     W*#us||'',5Ws   *-score_threshold)r   arrayfloat32r.   r   r)   search_batched	enumerater+   r*   searchr8   r   r9   itemsrD   allappendrS   r,   r   MAX_INNER_PRODUCTJACCARDoperatorgele)r0   r(   kfilterfetch_kr?   vectorindicesscoresdocsji_idrU   rV   rY   cmprW   
similaritys                    ` r   &similarity_search_with_score_by_vectorz,ScaNN.similarity_search_with_score_by_vector   s   0 9+RZZ8v&F**33AW
 gaj) 	1DAqBw++A.C--&&s+Cc8, #B3%vcU!STT! '-lln"U 
5$(?%UJ  WWWKKfQil 34S&)A,/0!	1$ !**%67& ))$668H8P8PQR  [[	  (,#Cz?3 j!D 
 BQx-"s    G=G"c                j    | j                   j                  |      } | j                  ||f||d|}|S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        rh   ri   )r(   embed_queryrs   )r0   queryrg   rh   ri   r?   r(   rm   s           r   similarity_search_with_scorez"ScaNN.similarity_search_with_score   sN    * NN..u5	:t::
 	

 
 r    c                f     | j                   ||f||d|}|D cg c]  \  }}|	 c}}S c c}}w )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        ru   )rs   )	r0   r(   rg   rh   ri   r?   docs_and_scoresrW   _s	            r   similarity_search_by_vectorz!ScaNN.similarity_search_by_vector   sN    ( F$EE
 	

 
 #22Q222   -c                f     | j                   ||f||d|}|D cg c]  \  }}|	 c}}S c c}}w )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        ru   )rx   )	r0   rw   rg   rh   ri   r?   rz   rW   r{   s	            r   similarity_searchzScaNN.similarity_search  sG    ( <$;;1
#W
8>
 #22Q222r}   c                   t        d      }|j                  dt        j                        }	|j                  dd       }
t	        j
                  |t        j                        }|rt        |      }|
|j                  j                  ||
      }n|	t        j                  k(  r:|j                  j                  |dd      j                         j                         }n9|j                  j                  |dd      j                         j                         }g }|*|D cg c]  }t        t        j                                ! }}t#        |      D ]*  \  }}|r||   ni }|j%                  t'        ||             , t)        t#        |            }t+        |      t+        |      k7  r#t-        t+        |       d	t+        |       d
      t/        t)        t1        |j3                         |                  } | ||||fd|i|S c c}w )Nr"   r,   r3   rN      dot_product
squared_l2)page_contentrR   z ids provided for z, documents. Each document should have an id.r2   )r   rS   r   EUCLIDEAN_DISTANCEr   rZ   r[   r   scann_ops_pybindcreate_searcherrb   builderscore_brute_forcebuildstruuiduuid4r]   ra   r   dictlen	Exceptionr   rH   values)clsr;   r<   r(   r=   r>   r2   r?   r"   r,   r3   rj   r)   	documentsr{   ro   textrR   index_to_idr*   s                       r   __fromzScaNN.__from+  s    W%"JJ!1!D!D
 zz.$7*BJJ7v&F#**::6<PE $4$F$FF**2261mL&&(UW  **2261lK&&(UW 
 	;.343tzz|$4C4 ' 	MGAt'0y|bHX4(KL	M 9S>*{s9~-{#$$6s9~6F G4 4 
 $D[-?-?-A9)M$NO	

 &
 
 	
 5s   $Hc                T    |j                  |      } | j                  |||f||d|S )aN  Construct ScaNN wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the ScaNN database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import ScaNN
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                scann = ScaNN.from_texts(texts, embeddings)
        rB   )rC   _ScaNN__from)r   r;   r(   r=   r>   r?   r<   s          r   
from_textszScaNN.from_textsg  sG    4 ..u5
szz
  
 
 	
r    c                    |D cg c]  }|d   	 }}|D cg c]  }|d   	 }} | j                   |||f||d|S c c}w c c}w )a  Construct ScaNN wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the ScaNN database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import ScaNN
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                scann = ScaNN.from_embeddings(text_embedding_pairs, embeddings)
        r   r   rB   )r   )	r   rI   r(   r=   r>   r?   tr;   r<   s	            r   from_embeddingszScaNN.from_embeddings  sm    8  //!1//$34qad4
4szz
  
 
 	
 04s	   =Ac                r   t        |      }|dj                  |      z  }|j                  dd       | j                  j	                  t        |             t        |dj                  |      z  d      5 }t        j                  | j                  | j                  f|       ddd       y# 1 sw Y   yxY w)zSave ScaNN index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
        {index_name}.scann
index_nameTexist_okparents{index_name}.pklwbN)r   formatmkdirr)   	serializer   openpickledumpr*   r+   )r0   folder_pathr   path
scann_pathfs         r   
save_localzScaNN.save_local  s     K 077:7NN
$5 	

S_- $+22j2II4P 	GTUKK(A(ABAF	G 	G 	Gs   7-B--B6)allow_dangerous_deserializationc                  |st        d      t        |      }|dj                  |      z  }|j                  dd       t	        d      }|j
                  j                  t        |            }	t        |dj                  |      z  d      5 }
t        j                  |
      \  }}d	d	d	        | ||	fi |S # 1 sw Y   xY w)
a  Load ScaNN index, docstore, and index_to_docstore_id from disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embedding: Embeddings to use when generating queries
            index_name: for saving with a specific index file name
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   Tr   r"   r   rbN)r9   r   r   r   r   r   load_searcherr   r   r   load)r   r   r(   r   r   r?   r   r   r"   r)   r   r*   r+   s                r   
load_localzScaNN.load_local  s    . /	"  K 077:7NN
$5W%&&44S_E $+22j2II4P 	TU $	 9eX/CNvNN	 	s   B<<Cc                    | j                   | j                   S | j                  t        j                  k(  r| j                  S | j                  t        j
                  k(  r| j                  S t        d      )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        zJUnknown distance strategy, must be cosine, max_inner_product, or euclidean)r-   r,   r   rb   %_max_inner_product_relevance_score_fnr   _euclidean_relevance_score_fnr9   )r0   s    r   _select_relevance_score_fnz ScaNN._select_relevance_score_fn  sr     ++7333 !!%5%G%GG===##'7'J'JJ555  r    c                   |j                  dd      }| j                         }|t        d       | j                  |f|||d|}|D 	
cg c]  \  }	}
|	 ||
      f }}	}
||D 	cg c]  \  }	}||k\  r|	|f }}	}|S c c}
}	w c c}}	w )z?Return docs and their similarity scores on a scale from 0 to 1.rY   NzLnormalize_score_fn must be provided to ScaNN constructor to normalize scores)rg   rh   ri   )popr   r9   rx   )r0   rw   rg   rh   ri   r?   rY   r1   rz   rW   scoredocs_and_rel_scoresrr   s                r   (_similarity_search_with_relevance_scoresz.ScaNN._similarity_search_with_relevance_scores  s     !**%6=!<<>%9  <$;;
	

 
 @O
1;eS$U+,
 
 & (;##C0 j!# #
 #"
#s   B*B)r(   r   r)   r   r*   r   r+   zDict[int, str]r1   z"Optional[Callable[[float], float]]r2   boolr,   r   r3   zOptional[str])NN)r;   Iterable[str]r<   zIterable[List[float]]r=   Optional[List[dict]]r>   Optional[List[str]]r?   r   return	List[str])
r;   r   r=   r   r>   r   r?   r   r   r   )
rI   z!Iterable[Tuple[str, List[float]]]r=   r   r>   r   r?   r   r   r   rQ   )r>   r   r?   r   r   zOptional[bool])   N   )r(   List[float]rg   intrh   Optional[Dict[str, Any]]ri   r   r?   r   r   List[Tuple[Document, float]])rw   r   rg   r   rh   r   ri   r   r?   r   r   r   )r(   r   rg   r   rh   r   ri   r   r?   r   r   List[Document])rw   r   rg   r   rh   r   ri   r   r?   r   r   r   )NNF)r;   r   r<   zList[List[float]]r(   r   r=   r   r>   r   r2   r   r?   r   r   r&   )r;   r   r(   r   r=   r   r>   r   r?   r   r   r&   )rI   zList[Tuple[str, List[float]]]r(   r   r=   r   r>   r   r?   r   r   r&   )r)   )r   r   r   r   r   None)r   r   r(   r   r   r   r   r   r?   r   r   r&   )r   zCallable[[float], float])__name__
__module____qualname____doc__r   r   r4   rE   rF   rJ   rL   rs   rx   r|   r   classmethodr   r   r   r   r   r   r   r#   r    r   r&   r&   !   sM   . BF".>.Q.Q&*** * 	*
 -* ?* * ,* $*2 +/#'NN *N (	N
 !N N 
N$ +/#'	UU (U !	U
 U 
U0 +/#'	U:U (U !	U
 U 
U8P" +/>> > )	>
 > > 
&>F +/  )	
   
&D +/33 3 )	3
 3 3 
3@ +/33 3 )	3
 3 3 
32  +/#'"9
9
 &9
 	9

 (9
 !9
 9
 9
 
9
 9
v 
 +/#'!
!
 !
 (	!

 !!
 !
 
!
 !
F 
 +/#'$
6$
 $
 (	$

 !$
 $
 
$
 $
LG$ 
 "	3O 163O3O 3O 	3O *.3O 3O 
3O 3Oj8 +/"#"# "# )	"#
 "# "# 
&"#r    r&   )r   
np.ndarrayr   r   )r   r   )#
__future__r   rd   r   r   pathlibr   typingr   r   r   r   r	   r
   r   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   !langchain_community.docstore.baser   r   &langchain_community.docstore.in_memoryr   &langchain_community.vectorstores.utilsr   r   r$   r&   r#   r    r   <module>r      sM    "     G G G  - 0 - 3 D C C!T#K T#r    