
    h F                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z   e!g d      Z"dZ#ddZ$ G d de      Z%y)    )annotationsN)ConfigParser)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)Docstore)InMemoryDocstore)maximal_marginal_relevance)angular	euclidean	manhattanhammingdotr   c                     t        d      S )z1Import annoy if available, otherwise raise error.annoyr        d/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/annoy.pydependable_annoy_importr      s      r   c                  B   e Zd ZdZ	 	 	 	 	 	 	 	 	 	 ddZedd       Z	 d	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 ddZ		 d	 	 	 	 	 	 	 dd	Z
	 d	 	 	 	 	 	 	 d d
Z	 d	 	 	 	 	 	 	 	 	 d!dZ	 d	 	 	 	 	 	 	 	 	 d"dZ	 d	 	 	 	 	 	 	 	 	 d#dZ	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 d%dZ	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 d&dZededdf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Zededdf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d(d       Zededdf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d)d       Zd*d+dZedd	 	 	 	 	 	 	 d,d       Zy)-Annoya  `Annoy` vector store.

    To use, you should have the ``annoy`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Annoy
            db = Annoy(embedding_function, index, docstore, index_to_docstore_id)

    c                J    || _         || _        || _        || _        || _        y)z%Initialize with necessary components.N)embedding_functionindexmetricdocstoreindex_to_docstore_id)selfr#   r$   r%   r&   r'   s         r   __init__zAnnoy.__init__*   s)     #5
 $8!r   c                     y Nr   )r(   s    r   
embeddingszAnnoy.embeddings9   s     r   Nc                    t        d      )Nz=Annoy does not allow to add new data once the index is build.)NotImplementedError)r(   texts	metadataskwargss       r   	add_textszAnnoy.add_texts>   s     "K
 	
r   c                    g }t        ||      D ]c  \  }}| j                  |   }| j                  j                  |      }t	        |t
              st        d| d|       |j                  ||f       e |S )a  Turns annoy results into a list of documents and scores.

        Args:
            idxs: List of indices of the documents in the index.
            dists: List of distances of the documents in the index.
        Returns:
            List of Documents and scores.
        Could not find document for id , got )zipr'   r&   search
isinstancer   
ValueErrorappend)r(   idxsdistsdocsidxdist_iddocs           r   process_index_resultszAnnoy.process_index_resultsH   s     T5) 	%IC++C0C--&&s+Cc8, #B3%vcU!STTKKd$	% r   c                j    | j                   j                  |||d      \  }}| j                  ||      S a}  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided
        Returns:
            List of Documents most similar to the query and score for each
        Tsearch_kinclude_distances)r$   get_nns_by_vectorrB   )r(   	embeddingkrG   r;   r<   s         r   &similarity_search_with_score_by_vectorz,Annoy.similarity_search_with_score_by_vector\   s?     jj22q8t 3 
e ))$66r   c                j    | j                   j                  |||d      \  }}| j                  ||      S rE   )r$   get_nns_by_itemrB   )r(   docstore_indexrK   rG   r;   r<   s         r   %similarity_search_with_score_by_indexz+Annoy.similarity_search_with_score_by_indexn   s?     jj00AD 1 
e ))$66r   c                N    | j                  |      }| j                  |||      }|S )a~  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query and score for each
        )r#   rL   )r(   queryrK   rG   rJ   r=   s         r   similarity_search_with_scorez"Annoy.similarity_search_with_score   s-     ++E2	::9aRr   c                ^    | j                  |||      }|D cg c]  \  }}|	 c}}S c c}}w )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        )rL   )r(   rJ   rK   rG   r1   docs_and_scoresrA   _s           r   similarity_search_by_vectorz!Annoy.similarity_search_by_vector   s5     EEq(
 #22Q222   )c                ^    | j                  |||      }|D cg c]  \  }}|	 c}}S c c}}w )az  Return docs most similar to docstore_index.

        Args:
            docstore_index: Index of document in docstore
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        )rP   )r(   rO   rK   rG   r1   rU   rA   rV   s           r   similarity_search_by_indexz Annoy.similarity_search_by_index   s5     DDAx
 #22Q222rX   c                ^    | j                  |||      }|D cg c]  \  }}|	 c}}S c c}}w )al  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query.
        )rS   )r(   rR   rK   rG   r1   rU   rA   rV   s           r   similarity_searchzAnnoy.similarity_search   s0     ;;E1hO"12Q222rX   c                   | j                   j                  ||dd      }|D cg c]  }| j                   j                  |       }}t        t	        j
                  |gt        j                        |||      }	|	D cg c]  }|dk7  s	||    }
}g }|
D ]^  }| j                  |   }| j                  j                  |      }t        |t              st        d| d|       |j                  |       ` |S c c}w c c}w )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            k: Number of Documents to return. Defaults to 4.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.

        Returns:
            List of Documents selected by maximal marginal relevance.
        rC   FrF   )dtype)rK   lambda_multr4   r5   )r$   rI   get_item_vectorr   nparrayfloat32r'   r&   r7   r8   r   r9   r:   )r(   rJ   rK   fetch_kr_   r1   r;   ir,   mmr_selectedselected_indicesr=   r@   rA   s                 r   'max_marginal_relevance_search_by_vectorz-Annoy.max_marginal_relevance_search_by_vector   s   2 zz++wu , 
 >BBdjj003B
B1HHi[

3#	
 .:EQ"WDGEE! 	A++A.C--&&s+Cc8, #B3%vcU!STTKK	 # C Fs   "C:?
C?
C?c                R    | j                  |      }| j                  ||||      }|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        )r_   )r#   rh   )r(   rR   rK   rd   r_   r1   rJ   r=   s           r   max_marginal_relevance_searchz#Annoy.max_marginal_relevance_search   s9    0 ++E2	;;q'{ < 
 r   d   c                   |t         vrt        d| dt        t                      t        d      }	|st        d      t	        |d         }
|	j                  |
|      }t        |      D ]  \  }}|j                  ||        |j                  ||       g }t        |      D ]*  \  }}|r||   ni }|j                  t        ||             , t        t	        |            D ci c]   }|t        t        j                               " }}t        t        |      D ci c]  \  }}||   | c}}      } | |j                   ||||      S c c}w c c}}w )	NzUnsupported distance metric: z. Expected one of r   z/embeddings must be provided to build AnnoyIndexr   r%   )n_jobs)page_contentmetadata)INDEX_METRICSr9   listr   len
AnnoyIndex	enumerateadd_itembuildr:   r   rangestruuiduuid4r   embed_query)clsr/   r,   rJ   r0   r%   treesrn   r1   r   fr$   re   emb	documentstextrp   index_to_idrA   r&   s                       r   __fromzAnnoy.__from  sf    &3F8 <''+M':&;=  W%NOO
1  6 2
+ 	#FAsNN1c"	#E&)	 ' 	MGAt'0y|bHX4(KL	M 6;3y>5JKq#djjl++KK#/8/CDVQ[^S D
 9((%;OO	 LDs   (%E"E
c           	     V    |j                  |      } | j                  |||||||fi |S )a  Construct Annoy wrapper from raw documents.

        Args:
            texts: List of documents to index.
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                index = Annoy.from_texts(texts, embeddings)
        )embed_documents_Annoy__from)	r}   r/   rJ   r0   r%   r~   rn   r1   r,   s	            r   
from_textszAnnoy.from_texts<  s?    H ..u5
szz:y)VUF
NT
 	
r   c           	         |D cg c]  }|d   	 }	}|D cg c]  }|d   	 }
} | j                   |	|
|||||fi |S c c}w c c}w )a  Construct Annoy wrapper from embeddings.

        Args:
            text_embeddings: List of tuples of (text, embedding)
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1

        This is a user friendly interface that:
            1. Creates an in memory docstore with provided embeddings
            2. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                db = Annoy.from_embeddings(text_embedding_pairs, embeddings)
        r      )r   )r}   text_embeddingsrJ   r0   r%   r~   rn   r1   tr/   r,   s              r   from_embeddingszAnnoy.from_embeddingse  se    J  //!1//$34qad4
4szz:y)VUF
NT
 	
 04s	   >AFc                   t        |      }t        j                  |d       t               }| j                  j
                  | j                  d|d<   | j                  j                  t        |dz        |       t        |dz  d      5 }t        j                  | j                  | j                  |f|       d	d	d	       y	# 1 sw Y   y	xY w)
a  Save Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
            prefault: Whether to pre-load the index into memory.
        T)exist_ok)r   r%   ANNOYindex.annoy)prefault	index.pklwbN)r   osmakedirsr   r$   r   r%   savery   openpickledumpr&   r'   )r(   folder_pathr   pathconfig_objectfiles         r   
save_localzAnnoy.save_local  s     K 
D4($kk"
g 	

D=01HE$$d+ 	YtKK(A(A=QSWX	Y 	Y 	Ys   .CC)allow_dangerous_deserializationc                  |st        d      t        |      }t        d      }t        |dz  d      5 }t	        j
                  |      \  }}}	ddd       t        	d   d         }
|	d   d   }|j                  |
|	      }|j                  t        |d
z                | |j                  ||      S # 1 sw Y   hxY w)aR  Load Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embeddings: Embeddings to use when generating queries.
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   rbNr   r   r%   rm   r   )
r9   r   r   r   r   loadintrt   ry   r|   )r}   r   r,   r   r   r   r   r&   r'   r   r   r%   r$   s                r   
load_localzAnnoy.load_local  s    ( /	"  K W%$$d+ 	t 	$	 g&s+,w'1  6 2

3tm+,-""E68=Q
 	
!	 	s   B44B=)
r#   r   r$   r   r%   ry   r&   r   r'   zDict[int, str])returnzOptional[Embeddings]r+   )r/   zIterable[str]r0   Optional[List[dict]]r1   r   r   	List[str])r;   z	List[int]r<   List[float]r   List[Tuple[Document, float]])   rC   )rJ   r   rK   r   rG   r   r   r   )rO   r   rK   r   rG   r   r   r   )rR   ry   rK   r   rG   r   r   r   )
rJ   r   rK   r   rG   r   r1   r   r   List[Document])
rO   r   rK   r   rG   r   r1   r   r   r   )
rR   ry   rK   r   rG   r   r1   r   r   r   )r      g      ?)rJ   r   rK   r   rd   r   r_   floatr1   r   r   r   )rR   ry   rK   r   rd   r   r_   r   r1   r   r   r   )r/   r   r,   zList[List[float]]rJ   r   r0   r   r%   ry   r~   r   rn   r   r1   r   r   r!   )r/   r   rJ   r   r0   r   r%   ry   r~   r   rn   r   r1   r   r   r!   )r   zList[Tuple[str, List[float]]]rJ   r   r0   r   r%   ry   r~   r   rn   r   r1   r   r   r!   )F)r   ry   r   boolr   None)r   ry   r,   r   r   r   r   r!   )__name__
__module____qualname____doc__r)   propertyr,   r2   rB   rL   rP   rS   rW   rZ   r\   rh   rj   classmethodDEFAULT_METRICr   r   r   r   r   r   r   r   r!   r!      s   
9$9 9 	9
 9 -9   +/

 (
 	

 

&1	%* CE7$7),7<?7	%7& @B7!7&)79<7	%7& 79 03	%& CE3$3),3<?3PS3	3( @B3!3&)39<3MP3	3( 7933 3033DG3	3(  -- - 	-
 - - 
-d    	
   
<  +/$#P#P &#P 	#P
 (#P #P #P #P #P 
#P #PJ 
 +/$&
&
 &
 (	&

 &
 &
 &
 &
 
&
 &
P 
 +/$)
6)
 )
 (	)

 )
 )
 )
 )
 
)
 )
VY(  166
6
 6

 *.6
 
6
 6
r   r!   )r   r   )&
__future__r   r   r   rz   configparserr   pathlibr   typingr   r   r   r	   r
   r   r   numpyra   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   !langchain_community.docstore.baser   &langchain_community.docstore.in_memoryr   &langchain_community.vectorstores.utilsr   	frozensetrq   r   r   r!   r   r   r   <module>r      s[    " 	   %  G G G  - 0 - 3 6 C MQR!

K 
r   