
    hh                        d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ erd dlmZ dd	d
d
dZ ej(                  e      Z G d de      Zy)    )annotationsN)TYPE_CHECKINGAnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStoreHippoClient	localhost7788admin)hostportusernamepasswordc                     e Zd ZdZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	 d	 	 	 	 	 ddZ	 d	 	 	 	 	 dd	Zdd
ZddZ	ddZ
	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 d dZedddedddf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d!d       Zy)"Hippoa  `Hippo` vector store.

    You need to install `hippo-api` and run Hippo.

    Please visit our official website for how to run a Hippo instance:
    https://www.transwarp.cn/starwarp

    Args:
        embedding_function (Embeddings): Function used to embed the text.
        table_name (str): Which Hippo table to use. Defaults to
            "test".
        database_name (str): Which Hippo database to use. Defaults to
            "default".
        number_of_shards (int): The number of shards for the Hippo table.Defaults to
            1.
        number_of_replicas (int): The number of replicas for the Hippo table.Defaults to
            1.
        connection_args (Optional[dict[str, any]]): The connection args used for
            this class comes in the form of a dict.
        index_params (Optional[dict]): Which index params to use. Defaults to
            IVF_FLAT.
        drop_old (Optional[bool]): Whether to drop the current collection. Defaults
            to False.
        primary_field (str): Name of the primary key field. Defaults to "pk".
        text_field (str): Name of the text field. Defaults to "text".
        vector_field (str): Name of the vector field. Defaults to "vector".

    The connection args used for this class comes in the form of a dict,
    here are a few of the options:
        host (str): The host of Hippo instance. Default at "localhost".
        port (str/int): The port of Hippo instance. Default at 7788.
        user (str): Use which user to connect to Hippo instance. If user and
            password are provided, we will add related header in every RPC call.
        password (str): Required when user is provided. The password
            corresponding to the user.

    Example:
        .. code-block:: python

        from langchain_community.vectorstores import Hippo
        from langchain_community.embeddings import OpenAIEmbeddings

        embedding = OpenAIEmbeddings()
        # Connect to a hippo instance on localhost
        vector_store = Hippo.from_documents(
            docs,
            embedding=embeddings,
            table_name="langchain_test",
            connection_args=HIPPO_CONNECTION
        )

    Raises:
        ValueError: If the hippo-api python package is not installed.
    testdefaultNFc	                h   || _         || _        || _        || _        || _        || _        d| _        d| _        d| _        g | _	        |t        }| j                  |      | _        d | _        	 | j                  j                  | j                  | j                        r2|r0| j                  j                  | j                  | j                         	 | j                  j                  | j                  | j                        r5| j                  j'                  | j                  | j                        | _        | j)                          y # t         $ r+}	t#        j$                  d| j                   d|	         d }	~	ww xY w# t         $ r+}	t#        j$                  d| j                   d|	         d }	~	ww xY w)Npktextvectorz+An error occurred while deleting the table z: z*An error occurred while getting the table )number_of_shardsnumber_of_replicasembedding_func
table_namedatabase_nameindex_params_primary_field_text_field_vector_fieldfieldsDEFAULT_HIPPO_CONNECTION_create_connection_aliashccolcheck_table_existsdelete_table	Exceptionloggingerror	get_table_get_env)
selfembedding_functionr"   r#   r   r    connection_argsr$   drop_oldes
             d/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/hippo.py__init__zHippo.__init__P   s~    !1"40$*( #!%!#"6O//@
	**4??D<N<NO$$T__d6H6HI	ww))$//4;M;MN77,,T__d>P>PQ 	!  	MM=doo=NbQRPST 		  	MM<T__<MRPQsS 		s2   -A"E A%E= 	E:&E55E:=	F1&F,,F1c                H   	 ddl m} |j                  dd      }|j                  dd      }|j                  dd      }|j                  d	d      }|W|Ud
|v r6|j	                  d
      }d
j                  |D 	cg c]	  }	|	 d|  c}	      }
n&t        |      dz   t        |      z   }
nt        d      	 t        j                  d|
 d        ||
g||      S # t        $ r}t        d      |d}~ww xY wc c}	w # t        $ r}t        j                  d       |d}~ww xY w)z*Create the connection to the Hippo server.r   r   zQUnable to import transwarp_hipp_api, please install with `pip install hippo-api`.Nr   r   r   shivar   ,:z/Missing standard address type for reuse attemptzcreate HippoClient[])r   pwdzFailed to create new connection) transwarp_hippo_api.hippo_clientr   ImportErrorgetsplitjoinstr
ValueErrorloggerinfor/   r1   )r4   r6   r   r8   r   r   r   r   hostshgiven_addresss              r9   r*   zHippo._create_connection_alias   s8   	D $''5#''5'++J@'++J@  0d{

3 #)GAQCq-)G H #D	C#d) ;NOO	KK-m_A>?xPP/  	+ 	 *H  	LL:;G	s5   C 8C75$C< 	C4#C//C4<	D!DD!c                    t         j                  d       |'t         j                  d       | j                  ||       | j                          | j	                          y )Nzinit ...zcreate collection)rH   rI   _create_collection_extract_fields_create_index)r4   
embeddings	metadatass      r9   r3   zHippo._get_env   sK     	J!KK+,##J	:    c           
        ddl m} ddlm} t	        |d         }t
        j                  d|        g }|j                   || j                  d|j                               |j                   || j                  d|j                               |j                   || j                  d|j                  d|i             |r|d   j                         D ]k  \  }}t        |t              r3t	        |      }	|j                   ||d|j                  d|	i             I|j                   ||d|j                               m t
        j                  d	|        | j                   j#                  | j$                  d|| j&                  | j(                  | j*                  
       | j                   j-                  | j$                  | j&                        | _        t
        j1                  d| j$                   d| j&                   d       y )Nr   )
HippoField)	HippoTypez[_create_collection] dim: TF	dimension)type_paramsz[_create_collection] fields: )nameauto_idr(   r#   r   r    z$[_create_collection] : create table z in z successfully)rA   rU   transwarp_hippo_api.hippo_typerV   lenrH   debugappendr%   STRINGr&   r'   FLOAT_VECTORitems
isinstancelistr+   create_tabler"   r#   r   r    r2   r,   rI   )
r4   rQ   rR   rU   rV   dimr(   keyvalue	value_dims
             r9   rN   zHippo._create_collection   s    	@< *Q- 1#78 	j!4!4dI<L<LMN 	j!1!15):J:JKL 	""&&(#.		
 'l002 L
UeT* #E
IMM"!%22)4i(@	 MM*S%9I9I"JKL 	4VH=> 	,,!22#66 	 	
 77$$T__d6H6HI OO,D1C1C0DMS	
rS   c                6   ddl m} t        | j                  |      r}| j                  j                  }t
        j                  d|        |D ]'  }| j                  j                  |j                         ) t
        j                  d| j                          yy)z,Grab the existing fields from the Collectionr   
HippoTablez[_extract_fields] schema:z04 [_extract_fields] fields:N)
rA   rk   rb   r,   schemarH   r]   r(   r^   rY   )r4   rk   rl   xs       r9   rO   zHippo._extract_fields   sv    ?dhh
+XX__FLL4VH=> +""166*+LL7}EF ,rS   c                   ddl m} t        | j                  |      r| j                  j                  | j                  | j                        j                  | j                  i       }|j                  dd      }|y| j                  j                  | j                  | j                        | j                     d   D ]0  }t        j                  d|        |d   | j                  k(  s.|c S  y)z0Return the vector index information if it existsr   rj   embedding_indexesNz[_get_index] embedding_indexes column)rA   rk   rb   r,   r+   get_table_infor"   r#   rC   rH   r]   r'   )r4   rk   
table_inforo   rm   s        r9   
_get_indexzHippo._get_index  s    ?dhh
+//!3!3c$//2&  !+/BD I (//ASASTOO%' !A LL#BCTBU!VW{d&8&88 ! rS   c                \   ddl m} ddlm}m} t        | j                  |      r| j                         u| j                         c| j                  d|j                  |j                  dd| _        | j                  j                  | j                  | j                  d   | j                  d	   | j                  d
   | j                  d          t        j                  | j                  j                  | j                  d                t        j!                  d       y|j                  |j"                  |j$                  |j&                  |j(                  d}|j*                  |j*                  |j                  |j                  d}|| j                  d
      | j                  d
<   | j                  d	   dk(  r|| j                  d	      | j                  d	<   | j                  j                  | j                  | j                  d   | j                  d	   | j                  d
          t        j                  | j                  j                  | j                  d                y| j                  d	   dk(  s| j                  d	   dk(  r|| j                  d	      | j                  d	<   | j                  j                  | j                  | j                  d   | j                  d	   | j                  d
   | j                  j-                  dd      | j                  j-                  dd             t        j                  | j                  j                  | j                  d                y| j                  d	   dk(  r|| j                  d	      | j                  d	<   | j                  j                  | j                  | j                  d   | j                  d	   | j                  d
   | j                  j-                  dd      | j                  j-                  dd      | j                  j-                  dd      | j                  j-                  d             t        j                  | j                  j                  | j                  d                y| j                  d	   dk(  r|| j                  d	      | j                  d	<   | j                  j                  | j                  | j                  d   | j                  d	   | j                  d
   | j                  j-                  d      | j                  j-                  d      | j                  j-                  d             t        j                  | j                  j                  | j                  d                yt/        d      yyy) z Create a index on the collectionr   rj   )	IndexType
MetricTypeNlangchain_auto_create
   )
index_namemetric_type
index_typenlistry   r{   rz   r|   )r|   zcreate index successfully)IVF_FLATFLATIVF_SQIVF_PQHNSW)ipIPl2L2r~   r}   r   nprobe)r|   r   r   nbits   m)r|   r   r   r   r   Mef_construction	ef_search)r   r   r   zeIndex name does not match, please enter the correct index name. (FLAT, IVF_FLAT, IVF_PQ,IVF_SQ, HNSW))rA   rk   r[   ru   rv   rb   r,   rs   r$   r   r}   create_indexr'   rH   r]   activate_indexrI   r~   r   r   r   r   rC   rG   )r4   rk   ru   rv   
index_dictmetric_dicts         r9   rP   zHippo._create_index  s   ?Hdhh
+0A0I ($$,&='1}}&/&8&8!#	)D% HH))**)),7)),7))-8"//8 *  LL//0A0A,0OP KK ;< %.$6$6 )"+"2"2"+"2"2 )"J )mm(mm(mm(mm	#K 8C))-88D%%m4 ((6&@:D --l;;)),7 -- .. --l; --l; --m<	  HH33D4E4El4ST )),7:E,,\:hF:D --l;;)),7 -- .. --l; --l; --m<"&"3"3"7"7"D#'#4#4#8#82#F .   HH33D4E4El4ST **<8HD:D --l;;)),7 -- .. --l; --l; --m<"&"3"3"7"7"D#'#4#4#8#82#F"&"3"3"7"7"C"//33C8 . 	  HH33D4E4El4ST **<8FB:D --l;;)),7 -- .. --l; --l; --m<"//33C8,0,=,=,A,ABS,T&*&7&7&;&;K&H .   HH33D4E4El4ST )D Q ) 1J+rS   c                   ddl m} |rt        d |D              rt        j	                  d       g S t        |      }t        j	                  d|        	 | j                  j                  |      }t        |      dk(  rt        j	                  d       g S t        j	                  dt        |              t        | j                  |      s| j                  ||       | j                  || j                  |i}	t        j	                  d|        t        j	                  d| j                           |O|D ]J  }
|
j#                         D ]5  \  }}|| j                   v s|	j%                  |g       j'                  |       7 L t        j	                  |	| j                            |	| j                     }t        |      }d
| j                   v r| j                   j)                  d
       t        j	                  d|        t+        d||      D ]d  }t-        ||z   |      }| j                   D cg c]
  }|	|   ||  }}	 | j                  j/                  |      }t        j1                  d|        f dgS # t        $ r2 |D cg c]  }| j                  j                  |       nc c}w }}Y Aw xY wc c}w # t2        $ r}t        j5                  d||       |d	}~ww xY w)a  
        Add text to the collection.

        Args:
            texts: An iterable that contains the text to be added.
            metadatas: An optional list of dictionaries,
            each dictionary contains the metadata associated with a text.
            timeout: Optional timeout, in seconds.
            batch_size: The number of texts inserted in each batch, defaults to 1000.
            **kwargs: Other optional parameters.

        Returns:
            A list of strings, containing the unique identifiers of the inserted texts.

        Note:
            If the collection has not yet been created,
            this method will create a new collection.
        r   rj   c              3  &   K   | ]	  }|d k(    yw) N ).0ts     r9   	<genexpr>z"Hippo.add_texts.<locals>.<genexpr>  s     3AG3s   zNothing to insert, skipping.z[add_texts] texts: z[add_texts] len_embeddings:z[add_texts] metadatas:z[add_texts] fields:Nr   z[add_texts] total_count:z05 [add_texts] insert z0Failed to insert batch starting at entity: %s/%sr   )rA   rk   allrH   r]   rc   r!   embed_documentsNotImplementedErrorembed_queryr\   rb   r,   r3   r&   r'   r(   ra   
setdefaultr^   removerangemininsert_rowsrI   r/   r1   )r4   textsrR   timeout
batch_sizekwargsrk   rQ   rm   insert_dictdrf   rg   vectorstotal_countiendinsert_listresr8   s                       r9   	add_textszHippo.add_texts  s   4 	@3U33LL78IU*5'23	M,,<<UCJ z?aLL78I23z?2CDE $((J/MM*i0 e
(
 	-i[9:*4;;-89  F"#'') FJCdkk)#..sB7>>uEFF
 	[!1!123 $D$6$67'l4;;KKt$/}=>q+z2 	Aa*nk2C:>++FQ;q>!C0FKFhh**;74SE:;	 tc # 	MFKL$--99!<LLJL	MP G  F; 	s<   I8 .J6 3J;8J3"J('J32J3;	K"KK"c           	         | j                   t        j                  d       g S  | j                  d|||||d|}|D 	cg c]  \  }}	|	 c}	}S c c}	}w )a  
        Perform a similarity search on the query string.

        Args:
            query (str): The text to search for.
            k (int, optional): The number of results to return. Default is 4.
            param (dict, optional): Specifies the search parameters for the index.
            Defaults to None.
            expr (str, optional): Filtering expression. Defaults to None.
            timeout (int, optional): Time to wait before a timeout error.
            Defaults to None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[Document]: The document results of the search.
        !No existing collection to search.)querykparamexprr   r   )r,   rH   r]   similarity_search_with_score)
r4   r   r   r   r   r   r   r   doc_s
             r9   similarity_searchzHippo.similarity_search  sd    4 88LL<=I/d// 
1Eg
IO
 #&&Q&&&s   Ac           	         | j                   t        j                  d       g S | j                  j	                  |      } | j
                  d|||||d|}|S )a  
        Performs a search on the query string and returns results with scores.

        Args:
            query (str): The text being searched.
            k (int, optional): The number of results to return.
            Default is 4.
            param (dict): Specifies the search parameters for the index.
            Default is None.
            expr (str, optional): Filtering expression. Default is None.
            timeout (int, optional): The waiting time before a timeout error.
            Default is None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[float], List[Tuple[Document, any, any]]:
        r   )	embeddingr   r   r   r   r   )r,   rH   r]   r!   r   &similarity_search_with_score_by_vector)	r4   r   r   r   r   r   r   r   rets	            r9   r   z"Hippo.similarity_search_with_score  si    6 88LL<=I ''33E:	9d99 
1Eg
QW
 
rS   c                   | j                   t        j                  d       g S | j                  dd }|j	                  | j
                         t        j                  d| j
                          t        j                  d|g        t        j                  d|        t        j                  d|        t        j                  d|        | j                   j                  | j
                  |g|||      }t        j                  d	|        | j                  d
z   }	g }
d}t        |D cg c]
  }|d   |    c} D ]  }t        ||      D ci c]  \  }}||
 }}}t        |j                  | j                        |      }t        j                  d|d   |	           |d   |	   |   }|dz  }|
j                  ||f        |
S c c}w c c}}w )a  
        Performs a search on the query string and returns results with scores.

        Args:
            embedding (List[float]): The embedding vector being searched.
            k (int, optional): The number of results to return.
            Default is 4.
            param (dict): Specifies the search parameters for the index.
            Default is None.
            expr (str, optional): Filtering expression. Default is None.
            timeout (int, optional): The waiting time before a timeout error.
            Default is None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[Tuple[Document, float]]: Resulting documents and scores.
        Nr   zsearch_field:zvectors:zoutput_fields:ztopk:zdsl:)search_fieldr   output_fieldstopkdslz-[similarity_search_with_score_by_vector] res:z%scoresr   )page_contentmetadataz;[similarity_search_with_score_by_vector] res[0][score_col]:   )r,   rH   r]   r(   r   r'   r   r&   zipr   popr^   )r4   r   r   r   r   r   r   r   r   	score_colr   countfieldra   rg   metar   scores                     r9   r   z,Hippo.similarity_search_with_score_by_vector.  s   4 88LL<=I AT//0 	}T%7%7$89:x}-.~m_56uQC[!tD6]#hhnn++K'  
 	DSEJK$$y0	mDU3q6%=DE 		%E585NO\UEE5LODO1A1A(BTRCLL%%(VI%6$79 F9%e,EQJEJJU|$		% 
 EOs   0GGc
           
         |i }t         j                  d        | d||||||	d|
}t         j                  d|        t         j                  d|        |j                  ||       |S )a  
        Creates an instance of the VST class from the given texts.

        Args:
            texts (List[str]): List of texts to be added.
            embedding (Embeddings): Embedding model for the texts.
            metadatas (List[dict], optional):
            List of metadata dictionaries for each text.Defaults to None.
            table_name (str): Name of the table. Defaults to "test".
            database_name (str): Name of the database. Defaults to "default".
            connection_args (dict[str, Any]): Connection parameters.
            Defaults to DEFAULT_HIPPO_CONNECTION.
            index_params (dict): Indexing parameters. Defaults to None.
            search_params (dict): Search parameters. Defaults to an empty dictionary.
            drop_old (bool): Whether to drop the old collection. Defaults to False.
            kwargs: Other arguments.

        Returns:
            Hippo: An instance of the VST class.
        z'00 [from_texts] init the class of Hippo)r5   r"   r#   r6   r$   r7   z[from_texts] texts:z[from_texts] metadatas:)r   rR   r   )rH   rI   r]   r   )clsr   r   rR   r"   r#   r6   r$   search_paramsr7   r   	vector_dbs               r9   
from_textszHippo.from_textss  s    F  M=> 
(!'+%
 
	 	*5'23.yk:;%9=rS   )r   r   r   r   NNF)r5   r   r"   rF   r#   rF   r   intr    r   r6   Optional[Dict[str, Any]]r$   Optional[dict]r7   zOptional[bool])r6   dictreturnr   )NN)rQ   zOptional[list]rR   Optional[List[dict]]r   None)N)rQ   rc   rR   r   r   r   )r   r   )r   r   )NNi  )r   zIterable[str]rR   r   r   Optional[int]r   r   r   r   r   	List[str])   NNN)r   rF   r   r   r   r   r   Optional[str]r   r   r   r   r   zList[Document])r   rF   r   r   r   r   r   r   r   r   r   r   r   List[Tuple[Document, float]])r   zList[float]r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   rR   r   r"   rF   r#   rF   r6   zDict[str, Any]r$   zOptional[Dict[Any, Any]]r   r   r7   boolr   r   r   z'Hippo')__name__
__module____qualname____doc__r:   r*   r3   rN   rO   rs   rP   r   r   r   r   classmethodr)   r   r   rS   r9   r   r      s   5t !& !"#48'+#(7&7 7 	7
 7  7 27 %7 !7rD TX(<P	 CG?
?
+??
	?
B	G*rn +/!%VV (V 	V
 V V 
Vv  $"!% ' '  ' 	 '
  '  '  ' 
 'J  $"!%%% % 	%
 % % % 
&%T  $"!%CC C 	C
 C C C 
&CJ 
 +/ &*B152611 1 (	1
 1 1 (1 /1 01 1 1 
1 1rS   r   )
__future__r   r0   typingr   r   r   r   r   r	   r
   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   rA   r   r)   	getLoggerr   rH   r   r   rS   r9   <module>r      s[    "  L L L - 0 3< 	  
		8	$M
K M
rS   