
    h^0                        d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ erd dlmZ d dlmZ  G d	 d
ee      Z edeeef         Z ej4                  e      ZdZ G d de      Zy)    )annotationsN)Enum)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTypeVarUnion)Document)VectorStore)
Embeddings)
Collectionc                       e Zd ZdZdZ	 dZ	 dZy)DocumentDBSimilarityTypez)DocumentDB Similarity Type as enumerator.cosine
dotProduct	euclideanN)__name__
__module____qualname____doc__COSDOTEUC     i/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/documentdb.pyr   r      s    3
C
C
Cr   r   DocumentDBDocumentType)bound   c                  l   e Zd ZdZdddd	 	 	 	 	 	 	 	 	 ddZedd       ZddZe	 	 	 	 	 	 	 	 	 	 dd	       Z	dd
Z
ddZdej                  ddf	 	 	 	 	 	 	 	 	 ddZ	 d 	 	 	 	 	 	 	 d!dZd"dZe	 	 d#	 	 	 	 	 	 	 	 	 	 	 d$d       Zd d%dZd d&dZ	 	 	 d'	 	 	 	 	 	 	 	 	 d(dZ	 	 d)dd	 	 	 	 	 	 	 	 	 	 	 d*dZy)+DocumentDBVectorSearcha  `Amazon DocumentDB (with MongoDB compatibility)` vector store.
    Please refer to the official Vector Search documentation for more details:
    https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html

    To use, you should have both:
    - the ``pymongo`` python package installed
    - a connection string and credentials associated with a DocumentDB cluster

    Example:
        . code-block:: python

            from langchain_community.vectorstores import DocumentDBVectorSearch
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            from pymongo import MongoClient

            mongo_client = MongoClient("<YOUR-CONNECTION-STRING>")
            collection = mongo_client["<db_name>"]["<collection_name>"]
            embeddings = OpenAIEmbeddings()
            vectorstore = DocumentDBVectorSearch(collection, embeddings)
    vectorSearchIndextextContentvectorContent)
index_nametext_keyembedding_keyc               t    || _         || _        || _        || _        || _        t
        j                  | _        y)a  Constructor for DocumentDBVectorSearch

        Args:
            collection: MongoDB collection to add the texts to.
            embedding: Text embedding model to use.
            index_name: Name of the Vector Search index.
            text_key: MongoDB field that will contain the text
                for each document.
            embedding_key: MongoDB field that will contain the embedding
                for each document.
        N)_collection
_embedding_index_name	_text_key_embedding_keyr   r   _similarity_type)self
collection	embeddingr)   r*   r+   s         r    __init__zDocumentDBVectorSearch.__init__B   s8    ( &#%!+ 8 < <r   c                    | j                   S N)r.   r3   s    r    
embeddingsz!DocumentDBVectorSearch.embeddings]   s    r   c                    | j                   S )zUReturns the index name

        Returns:
            Returns the index name

        )r/   r9   s    r    get_index_namez%DocumentDBVectorSearch.get_index_namea   s     r   c                    	 ddl m}  ||      }|j                  d      \  }}||   |   }	 | |	|fi |S # t        $ r t        d      w xY w)a  Creates an Instance of DocumentDBVectorSearch from a Connection String

        Args:
            connection_string: The DocumentDB cluster endpoint connection string
            namespace: The namespace (database.collection)
            embedding: The embedding utility
            **kwargs: Dynamic keyword arguments

        Returns:
            an instance of the vector store

        r   )MongoClientzGCould not import pymongo, please install it with `pip install pymongo`..)pymongor>   ImportErrorsplit)
clsconnection_string	namespacer5   kwargsr>   clientdb_namecollection_namer4   s
             r    from_connection_stringz-DocumentDBVectorSearch.from_connection_stringj   sm    (	+ **;<#,??3#7 G__5
:y3F33  	) 	s	   6 Ac                    | j                   j                         }| j                  }|D ]  }|j                  d      }||k(  s y y)zVerifies if the specified index name during instance
            construction exists on the collection

        Returns:
          Returns True on success and False if no such index exists
            on the collection
        nameTF)r-   list_indexesr/   pop)r3   cursorr)   rescurrent_index_names        r    index_existsz#DocumentDBVectorSearch.index_exists   sP     !!..0%%
 	C!$!Z/	
 r   c                p    | j                         r&| j                  j                  | j                         yy)zEDeletes the index specified during instance construction if it existsN)rR   r-   
drop_indexr/   r9   s    r    delete_indexz#DocumentDBVectorSearch.delete_index   s-    ''(8(89 r   i      @   c           	         || _         | j                  j                  | j                  | j                  did||||ddgd}| j                  j
                  }|j                  |      }|S )a  Creates an index using the index name specified at
            instance construction

        Args:
            dimensions: Number of dimensions for vector similarity.
                The maximum number of supported dimensions is 2000

            similarity: Similarity algorithm to use with the HNSW index.
                 Possible options are:
                    - DocumentDBSimilarityType.COS (cosine distance),
                    - DocumentDBSimilarityType.EUC (Euclidean distance), and
                    - DocumentDBSimilarityType.DOT (dot product).

            m: Specifies the max number of connections for an HNSW index.
                Large impact on memory consumption.

            ef_construction: Specifies the size of the dynamic candidate list
                for constructing the graph for HNSW index. Higher values lead
                to more accurate results but slower indexing speed.


        Returns:
            An object describing the created index

        vectorhnsw)type
similarity
dimensionsmefConstruction)rL   keyvectorOptions)createIndexesindexes)r2   r-   rL   r/   r1   databasecommand)r3   r]   r\   r^   ef_constructioncreate_index_commandscurrent_databasecreate_index_responsess           r    create_indexz#DocumentDBVectorSearch.create_index   s    @ !+ "--22 !,, //: &&0&0*9&
!
$  ++44 2B1I1I!2
 &%r   Nc                   |j                  dt              }|xs	 d |D        }g }g }g }t        t        ||            D ][  \  }	\  }
}|j	                  |
       |j	                  |       |	dz   |z  dk(  s7|j                  | j                  ||             g }g }] |r!|j                  | j                  ||             |S )N
batch_sizec              3      K   | ]  }i   y wr8   r   ).0_s     r    	<genexpr>z3DocumentDBVectorSearch.add_texts.<locals>.<genexpr>   s     :M!2:Ms      r   )getDEFAULT_INSERT_BATCH_SIZE	enumeratezipappendextend_insert_texts)r3   texts	metadatasrF   rl   
_metadatastexts_batchmetadatas_batch
result_idsitextmetadatas               r    	add_textsz DocumentDBVectorSearch.add_texts   s     ZZ.GH
-6-M:Mu:M

#,S
-C#D 	%Aht$""8,A#q(!!$"4"4[/"RS "$	% d00oNOr   c           	        |sg S | j                   j                  |      }t        |||      D cg c]"  \  }}}| j                  || j                  |i|$ }}}}| j
                  j                  |      }|j                  S c c}}}w )zUsed to Load Documents into the collection

        Args:
            texts: The list of documents strings to load
            metadatas: The list of metadata objects associated with each document

        Returns:

        )r.   embed_documentsru   r0   r1   r-   insert_manyinserted_ids)	r3   ry   rz   r:   tr^   r5   	to_insertinsert_results	            r    rx   z$DocumentDBVectorSearch._insert_texts   s     I __44U;
 $'ui#D
 
1i ^^Q 3 3YD!D
	 

 ((44Y?)))
s   'Bc                Z    |t        d       | ||fi |}|j                  ||       |S )Nz*Must provide 'collection' named parameter.)rz   )
ValueErrorr   )rC   ry   r5   rz   r4   rF   vectorstores          r    
from_textsz!DocumentDBVectorSearch.from_texts  s@     IJJ*i:6:ey9r   c                N    |t        d      |D ]  }| j                  |        y)Nz#No document ids provided to delete.T)r   delete_document_by_id)r3   idsrF   document_ids       r    deletezDocumentDBVectorSearch.delete  s3    ;BCC 	4K&&{3	4r   c                    	 ddl m} |t        d      | j                  j                  d ||      i       y# t        $ r}t        d      |d}~ww xY w)zjRemoves a Specific Document by Id

        Args:
            document_id: The document identifier
        r   )ObjectIdz>Unable to import bson, please install with `pip install bson`.Nz"No document id provided to delete._id)bson.objectidr   rA   r   r-   
delete_one)r3   r   r   es       r    r   z,DocumentDBVectorSearch.delete_document_by_id&  sa    	.
 ABB##UH[,A$BC  	P	s   9 	AAAc           	        |si }d|idd|| j                   | j                  ||diig}| j                  j                  |      }g }|D ]9  }|j	                  | j
                        }	|j                  t        |	|             ; |S )a   Returns a list of documents.

        Args:
            embeddings: The query vector
            k: the number of documents to return
            ef_search: Specifies the size of the dynamic candidate list
                that HNSW index uses during search. A higher value of
                efSearch provides better recall at cost of speed.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
        Returns:
            A list of documents closest to the query vector
        z$matchz$searchvectorSearch)rY   pathr\   kefSearch)page_contentr   )r1   r2   r-   	aggregaterN   r0   rv   r   )
r3   r:   r   	ef_searchfilterpipelinerO   docsrP   r   s
             r     _similarity_search_without_scorez7DocumentDBVectorSearch._similarity_search_without_score7  s    * Fv"", $ 3 3&*&;&;$-%
*
 !!++H5 	CC774>>*DKKdSAB	C r   )r   c                   | j                   j                  |      }| j                  ||||      }|D cg c]  }| c}S c c}w )N)r:   r   r   r   )r.   embed_queryr   )	r3   queryr   r   r   rF   r:   r   docs	            r    similarity_searchz(DocumentDBVectorSearch.similarity_searchg  sL     __007
44!Q)F 5 
  $$$$$s   	A)
r4   z"Collection[DocumentDBDocumentType]r5   r   r)   strr*   r   r+   r   )returnr   )r   r   )
rD   r   rE   r   r5   r   rF   r   r   r%   )r   bool)r   None)
r]   intr\   r   r^   r   rf   r   r   zdict[str, Any]r8   )ry   zIterable[str]rz   zOptional[List[Dict[str, Any]]]rF   r   r   r
   )ry   	List[str]rz   zList[Dict[str, Any]]r   r
   )NN)ry   r   r5   r   rz   zOptional[List[dict]]r4   z,Optional[Collection[DocumentDBDocumentType]]rF   r   r   r%   )r   zOptional[List[str]]rF   r   r   zOptional[bool])r   zOptional[str]r   r   )   (   N)
r:   zList[float]r   r   r   r   r   Optional[Dict[str, Any]]r   List[Document])r   r   )r   r   r   r   r   r   r   r   rF   r   r   r   )r   r   r   r   r6   propertyr:   r<   classmethodrJ   rR   rU   r   r   rj   r   rx   r   r   r   r   r   r   r   r    r%   r%   ,   s!   4 .%,=6= =
 = = =6    44 4 	4
 4 
 4 4>$: /G/K/K!<&<& -<& 	<&
 <& 
<&B 59 2 	
 
,*0 
 +/CG  (	
 A  
  D( +/.. . 	.
 ). 
.f 	% ,0%% % 	% )% % 
%r   r%   )
__future__r   loggingenumr   typingr   r   r   r   r	   r
   r   r   r   langchain_core.documentsr   langchain_core.vectorstoresr   langchain_core.embeddingsr   pymongo.collectionr   r   r   r!   	getLoggerr   loggerrs   r%   r   r   r    <module>r      sz    "  
 
 
 . 34-sD  !!9c3hP 			8	$ H%[ H%r   