
    hY                        d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ  ej0                         Zdd
Z G d de      Z G d de      Z G d de      Zy)    )annotationsN)sha1)Thread)AnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStore)BaseSettingsSettingsConfigDictc                    |D ]  }|| vs y y)z
    Check if a string contains multiple substrings.
    Args:
        s: string to check.
        *args: substrings to check.

    Returns:
        True if all substrings are in the string, False otherwise.
    FT )sargsas      f/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/myscale.pyhas_mul_sub_strr      s"      A:     c                      e Zd ZU dZdZded<   dZded<   dZd	ed
<   dZd	ed<   dZ	ded<   dZ
ded<   dddddZded<   dZded<   dZded<   dZded<   d#dZ eddd d!"      Zy)$MyScaleSettingsa  MyScale client configuration.

    Attribute:
        myscale_host (str) : An URL to connect to MyScale backend.
                             Defaults to 'localhost'.
        myscale_port (int) : URL port to connect with HTTP. Defaults to 8443.
        username (str) : Username to login. Defaults to None.
        password (str) : Password to login. Defaults to None.
        index_type (str): index type string.
        index_param (dict): index build parameter.
        database (str) : Database name to find the table. Defaults to 'default'.
        table (str) : Table name to operate on.
                      Defaults to 'vector_table'.
        metric (str) : Metric to compute distance,
                       supported are ('L2', 'Cosine', 'IP'). Defaults to 'Cosine'.
        column_map (Dict) : Column type map to project column name onto langchain
                            semantics. Must have keys: `text`, `id`, `vector`,
                            must be same size to number of columns. For example:
                            .. code-block:: python

                                {
                                    'id': 'text_id',
                                    'vector': 'text_embedding',
                                    'text': 'text_plain',
                                    'metadata': 'metadata_dictionary_in_json',
                                }

                            Defaults to identity map.

    	localhoststrhosti   intportNOptional[str]usernamepasswordMSTG
index_typezOptional[Dict[str, str]]index_paramidtextvectormetadata)r&   r'   r(   r)   zDict[str, str]
column_mapdefaultdatabase	langchaintableCosinemetricc                    t        | |      S N)getattr)selfitems     r   __getitem__zMyScaleSettings.__getitem__U   s    tT""r   z.envutf-8myscale_ignore)env_fileenv_file_encoding
env_prefixextra)r5   r   returnr   )__name__
__module____qualname____doc__r   __annotations__r   r!   r"   r$   r%   r*   r,   r.   r0   r6   r   model_configr   r   r   r   r   !   s    > D#D#"Hm""Hm"J,0K)0 	"J  HcE3FC# &!	Lr   r   c                  t    e Zd ZdZ	 d	 	 	 	 	 	 	 d fdZedd       ZddZddZddZ		 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ
e	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zdd	Z	 d	 	 	 	 	 	 	 dd
Z	 d	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 d dZd!dZ	 	 d"	 	 	 	 	 	 	 d#dZedd       Z xZS )$MyScalea  `MyScale` vector store.

    You need a `clickhouse-connect` python package, and a valid account
    to connect to MyScale.

    MyScale can not only search with simple vector indexes.
    It also supports a complex query with multiple conditions,
    constraints and even sub-queries.

    For more information, please visit
        [myscale official site](https://docs.myscale.com/en/overview/)
    c                   	 ddl m} 	 ddlm} || _        t
        |           ||| _        nt               | _        | j                  sJ | j                  j                  r| j                  j                  sJ | j                  j                  rB| j                  j                  r,| j                  j                  r| j                  j                  sJ dD ]  }|| j                  j                  v rJ  | j                  j                  j                         dv sJ | j                  j                  d	v rt         j#                  d
       t%        |j'                  d            }| j                  j(                  rPddj+                  | j                  j(                  j-                         D cg c]  \  }}d| d| d c}}      z   nd}	d| j                  j                   d| j                  j                   d| j                  j                  d    d| j                  j                  d    d| j                  j                  d    d| j                  j                  d    d| j                  j                  d    d| d| j                  j                  d    d| j                  j.                   d| j                  j                   d|	 d| j                  j                  d    d }
|| _        d!| _        d"| _        || _        | j                  j                  j                         d#v rd$nd%| _         |d+| j                  j                  | j                  j                  | j                  j:                  | j                  j<                  d&|| _        	 | j>                  jA                  d'       | j>                  jA                  d*       | j>                  jA                  |
       y# t        $ r t        d      w xY w# t        $ r d | _        Y w xY wc c}}w # tB        $ r7}t         jE                  d(| j>                  jF                   d)       Y d}~d}~ww xY w),zMyScale Wrapper to LangChain

        embedding (Embeddings):
        config (MyScaleSettings): Configuration to MyScale Client
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        r   )
get_clientzlCould not import clickhouse connect python package. Please install it with `pip install clickhouse-connect`.)tqdmc                    | S r2   r   )xs    r   <lambda>z"MyScale.__init__.<locals>.<lambda>   s    1 r   N)r&   r(   r'   r)   )IPCOSINEL2)ipcosinel2z_Lower case metric types will be deprecated the future. Please use one of ('IP', 'Cosine', 'L2')ztry this out, ,'= z(
            CREATE TABLE IF NOT EXISTS .z(
                r&   z String,
                r'   r(   z! Array(Float32),
                r)   zP JSON,
                CONSTRAINT cons_vec_len CHECK length(                    z) = z$,
                VECTOR INDEX vidx z                     TYPE z&(                        'metric_type=z,)
            ) ENGINE = MergeTree ORDER BY z	
        \)rY   rU   )rN   rO   ASCDESC)r   r   r!   r"   z"SET allow_experimental_json_type=1zClickhouse version=z6 - There is no allow_experimental_json_type parameter.z$SET allow_experimental_object_type=1r   )$clickhouse_connectrH   ImportErrorrI   pgbarsuper__init__configr   r   r   r*   r,   r.   r0   upperloggerwarninglenembed_queryr%   joinitemsr$   dimBSmust_escape_embeddings
dist_orderr!   r"   clientcommand	Exceptiondebugserver_version)r4   	embeddingra   kwargsrH   rI   kri   vindex_paramsschema__	__class__s               r   r`   zMyScale.__init__n   s   	5	%!DJ 	 DK)+DK{{{{{DKK$4$444KK""$$!!""		
#
 6 	/A.....	/{{!!'')-CCCC;;!77NNG )''78 {{&& 388dkk6M6M6S6S6UVdaq1QCq\VWW 	
((,(<(<'=Qt{{?P?P>Q R''-. /''/0 1''12 3''
34 5[[++H56d3% @##';;#9#9(#C"D E++001 2&&*kk&8&8%9<. I++/;;+A+A$+G*H I	 &$[[''--/3CCE 	
 ! 
!!!![[))[[))	

 
	KK DE 	BCG$_  	K 	  	%$DJ	%: WF  	LL%dkk&@&@%A BF F 	s:   O' O? -PP 'O<?PP	Q&-QQc                    | j                   S r2   )rl   r4   s    r   
embeddingszMyScale.embeddings   s    r   c                8     dj                   fd|D              S )NrW   c              3  ^   K   | ]$  }|j                   v rj                   | n| & y wr2   )rk   rj   ).0cr4   s     r   	<genexpr>z%MyScale.escape_str.<locals>.<genexpr>   s/     V1!t/?/?*?$''1#QFVs   *-)rg   )r4   values   ` r   
escape_strzMyScale.escape_str   s    wwVPUVVVr   c                p   dj                  |      }g }|D ]R  }dj                  |D cg c]   }d| j                  t        |             d" c}      }|j                  d| d       T d| j                  j
                   d| j                  j                   d| ddj                  |       d	}|S c c}w )	NrT   rU   ()z8
                INSERT INTO TABLE 
                    rX   z))
                VALUES
                z
                )rg   r   r   appendra   r,   r.   )r4   transaccolumn_namesks_datan_ni_strs           r   _build_istrzMyScale._build_istr   s    XXl# 	#AAFbAdooc"g67q9FGALL1QCq"	#[[))*!DKK,=,=+>at D%! "	  Gs   %B3
c                ^    | j                  ||      }| j                  j                  |       y r2   )r   rn   ro   )r4   r   r   _i_strs       r   _insertzMyScale._insert   s&    !!'<8F#r   c           	        |xs6 |D cg c]*  }t        |j                  d            j                         , c}}| j                  j                  }g }|d   ||d   ||d   t        | j                  j                  |      i}	|xs |D 
cg c]  }
i  c}
}t        t        j                  |      |	|d   <   t        t        |      t        |	      z
        dk\  sJ t        |	j                          \  }}	 d}| j                  t        | dt        |      	      D ]  }t        ||j                  | j                  j                  d                  | j                   k(  sJ |j#                  |       t        |      |k(  sf|r|j%                          t'        | j(                  ||g
      }|j+                          g } t        |      dkD  r$|r|j%                          | j)                  ||       |D cg c]  }| c}S c c}w c c}
w c c}w # t,        $ r:}t.        j1                  dt3        |       dt5        |       d       g cY d}~S d}~ww xY w)a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            ids: Optional list of ids to associate with the texts.
            batch_size: Batch size of insertion
            metadata: Optional column data to be inserted

        Returns:
            List of ids from adding the texts into the vectorstore.

        r7   r&   r'   r(   r)   r   NzInserting data...)desctotal)targetr   	[91m[1m
[0m [95m[0m)r   encode	hexdigestra   r*   maprl   rf   jsondumpsre   setziprh   r^   indexri   r   rg   r   r   startrp   rc   errortyper   )r4   texts	metadatas
batch_sizeidsrt   tcolmap_r   r   ry   keysvaluesrv   ies                   r   	add_textszMyScale.add_texts   s   * I5Iad188G,-779I++((DM3FOUHs4#3#3#?#?G

 4e!4"!4	,/

I,FWZ()3w<#l"334999L..01f	AZZV#6c)n    
! 1TZZ(>(>x(HIJKtxxWWWq!w<:-dll'4IAGGI G
! 7|aFFHWd+"#!A##= J "5* $ 	LL?47)3CCF87STI	sC   /H
	H5B
H  A5H 5	H>H H 	I/IIIc                D     | ||fi |}|j                  ||||       |S )aZ  Create Myscale wrapper with existing texts

        Args:
            texts (Iterable[str]): List or tuple of strings to be added
            embedding (Embeddings): Function to extract text embedding
            config (MyScaleSettings, Optional): Myscale configuration
            text_ids (Optional[Iterable], optional): IDs for the texts.
                                                     Defaults to None.
            batch_size (int, optional): Batchsize when transmitting data to MyScale.
                                        Defaults to 32.
            metadata (List[dict], optional): metadata to texts. Defaults to None.
            Other keyword arguments will pass into
                [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python#clickhouse-connect-driver-api)
        Returns:
            MyScale Index
        )r   r   r   )r   )	clsr   rs   r   ra   text_idsr   rt   ctxs	            r   
from_textszMyScale.from_texts  s.    6 )V.v.ejIV
r   c                   d| j                   j                   d| j                   j                   d}|| j                   j                   d| j                   j                   dz  }|d| j                   j
                   dz  }|dz  }| j                  j                  d	| j                   j                   d| j                   j                         j                         D ]  }|d
|d   dd|d   ddz  } |dz  }|S )zText representation for myscale, prints backends, username and schemas.
            Easy to use with `str(Myscale())`

        Returns:
            repr: string to show connection info and data schema
        z	[92m[1mrX   z @ :z[0m

z[1musername: z[0m

Table Schema:
z4---------------------------------------------------
zDESC z|[94mname24sz
[0m|[96mr   z[0m|
)	ra   r,   r.   r   r   r!   rn   querynamed_results)r4   _reprrs      r   __repr__zMyScale.__repr__>  s    "$++"6"6!7q9J9J8K3ODKK$$%Qt{{'7'7&8DD$T[[%9%9$::TUU ""DKK(()4;;+<+<*=>

-/	A AfIc?*:1V9S/TE	 	 r   c                   dj                  t        t        |            }|rd| }nd}d| j                  j                  d    d| j                  j                  d    d| j                  j
                   d	| j                  j                   d
| d| j                  j                  d    d| d| j                   d| d
}|S )NrT   	PREWHERE rW   
            SELECT r'   z, 
                r)   z, dist
            FROM rX   
            
            ORDER BY distance(r(   , []) 
                AS dist 
            LIMIT )rg   r   r   ra   r*   r,   r.   rm   r4   q_embtopk	where_str	q_emb_strq_strs         r   _build_qstrzMyScale._build_qstrR  s     HHSe_-	#I;/IIKK**623 4''
34 5++&&'q):):(; <K #{{55h?@I; O) *&  r   c                ^     | j                   | j                  j                  |      ||fi |S )a  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of Documents
        )similarity_search_by_vectorrl   rf   )r4   r   ru   r   rt   s        r   similarity_searchzMyScale.similarity_searchf  s9    & 0t//((/I
AG
 	
r   c           	        | j                  |||      }	 | j                  j                  |      j                         D cg c]C  }t	        || j
                  j                  d      || j
                  j                  d            E c}S c c}w # t        $ r:}t        j                  dt        |       dt        |       d       g cY d}~S d}~ww xY w)  Perform a similarity search with MyScale by vectors

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of (Document, similarity)
        r'   r)   page_contentr)   r   r   r   N)r   rn   r   r   r   ra   r*   rp   rc   r   r   r   r4   rs   ru   r   rt   r   r   r   s           r   r   z#MyScale.similarity_search_by_vector}  s    .   Ay9
	 **51??A
 	 !"4;;#9#9&#A!Bt{{55jAB    	LL?47)3CCF87STI	s0   +B  ABB B 	C/CCCc           	        | j                  | j                  j                  |      ||      }	 | j                  j	                  |      j                         D cg c]H  }t        || j                  j                  d      || j                  j                  d            |d   fJ c}S c c}w # t        $ r:}t        j                  dt        |       dt        |       d       g cY d}~S d}~ww xY w)	/  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of documents most similar to the query text
            and cosine distance in float for each.
            Lower score represents more similarity.
        r'   r)   r   distr   r   r   N)r   rl   rf   rn   r   r   r   ra   r*   rp   rc   r   r   r   r4   r   ru   r   rt   r   r   r   s           r   'similarity_search_with_relevance_scoresz/MyScale.similarity_search_with_relevance_scores  s    *   !1!1!=!=e!DaS	 **51??A	  %&t{{'='=f'E%F!"4;;#9#9*#E!F fI	 	 	  	LL?47)3CCF87STI	s0   +B. AB)&B. )B. .	C17/C,&C1,C1c                    | j                   j                  d| j                  j                   d| j                  j                          y)z,
        Helper function: Drop data
        zDROP TABLE IF EXISTS rX   N)rn   ro   ra   r,   r.   r|   s    r   dropzMyScale.drop  s<     	#DKK$8$8#94;;;L;L:MN	
r   c                X   |	|J d       g }|r_t        |      dkD  rQdj                  |D cg c]  }d| d
 c}      }|j                  | j                  j                  d    d| d       |r|j                  |       t        |      dkD  sJ d	j                  |      }d
| j                  j
                   d| j                  j                   d| }	 | j                  j                  |       yc c}w # t        $ r(}	t        j                  t        |	             Y d}	~	yd}	~	ww xY w)a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        NzIYou need to specify where to be deleted! Either with `ids` or `where_str`r   rS   rU   r&   z IN (r   z AND zDELETE FROM rX   z WHERE TF)re   rg   r   ra   r*   r,   r.   rn   ro   rp   rc   r   r   )
r4   r   r   rt   condsr&   id_listwhere_str_finalqstrr   s
             r   deletezMyScale.delete  s&     KI$5 	
W	
6 3s8a<iiS 9r1RD 9:GLLDKK22489wiqIJLL#5zA~~!,,u-4;;//0$++2C2C1D E$%' 		KK% !:  	LLQ 	s   C3C8 8	D)D$$D)c                4    | j                   j                  d   S )Nr)   )ra   r*   r|   s    r   metadata_columnzMyScale.metadata_column  s    {{%%j11r   r2   )rs   r   ra   Optional[MyScaleSettings]rt   r   r>   None)r>   r   )r   r   r>   r   )r   r   r   Iterable[str]r>   r   )r   r   r   r   r>   r   )N    N)r   r   r   zOptional[List[dict]]r   r   r   Optional[Iterable[str]]rt   r   r>   	List[str])NNNr   )r   r   rs   r   r   zOptional[List[Dict[Any, Any]]]ra   r   r   r   r   r   rt   r   r>   rF   r>   r   r   List[float]r   r   r   r    r>   r      N)
r   r   ru   r   r   r    rt   r   r>   List[Document]
rs   r   ru   r   r   r    rt   r   r>   r   
r   r   ru   r   r   r    rt   r   r>   zList[Tuple[Document, float]])r>   r   )NN)r   zOptional[List[str]]r   r    rt   r   r>   zOptional[bool])r?   r@   rA   rB   r`   propertyr}   r   r   r   r   classmethodr   r   r   r   r   r   r   r   r   __classcell__rz   s   @r   rF   rF   `   sB     -1^%^% *^% 	^%
 
^%@    W$ +/'+66 (6 	6
 %6 6 
6p 
 59,0,0  2	
 * *   
 <* IM (+8E	* BF

 
1>
QT
	
4 #'	"" " !	"
 " 
"J BF## #1>#QT#	%#J
 $(#'$ $ !$ 	$
 
$L 2 2r   rF   c                       e Zd ZdZdg f	 	 	 	 	 	 	 	 	 d fdZ	 d		 	 	 	 	 	 	 d
dZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZedd       Z	 xZ
S )MyScaleWithoutJSONzsMyScale vector store without metadata column

    This is super handy if you are working to a SQL-native table
    Nc                6    t        |   ||fi | || _        y)ag  Building a myscale vector store without metadata column

        embedding (Embeddings): embedding model
        config (MyScaleSettings): Configuration to MyScale Client
        must_have_cols (List[str]): column names to be included in query
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        N)r_   r`   must_have_cols)r4   rs   ra   r   rt   rz   s        r   r`   zMyScaleWithoutJSON.__init__  s!     	F5f5)7r   c                   dj                  t        t        |            }|rd| }nd}d| j                  j                  d    ddj                  | j
                         d| j                  j                   d| j                  j                   d	| d
| j                  j                  d    d| d| j                   d| d	}|S )NrT   r   rW   r   r'   z, dist, 
                z
            FROM rX   r   r   r(   r   r   r   )	rg   r   r   ra   r*   r   r,   r.   rm   r   s         r   r   zMyScaleWithoutJSON._build_qstr  s     HHSe_-	#I;/IIKK**623 4$--./ 0++&&'q):):(; <K #{{55h?@I; O) *&  r   c                   | j                  |||      }	 | j                  j                  |      j                         D cg c]E  }t	        || j
                  j                  d      | j                  D ci c]  }|||   
 c}      G c}}S c c}w c c}}w # t        $ r:}t        j                  dt        |       dt        |       d       g cY d}~S d}~ww xY w)r   r'   r   r   r   r   N)r   rn   r   r   r   ra   r*   r   rp   rc   r   r   r   r   s           r   r   z.MyScaleWithoutJSON.similarity_search_by_vector%  s    .   Ay9
	 **51??A
 	 !"4;;#9#9&#A!B/3/B/BC!a1gC  D  	LL?47)3CCF87STI	sA   ,B 3B4B
BB BB 	C#/CCCc                   | j                  | j                  j                  |      ||      }	 | j                  j	                  |      j                         D cg c]J  }t        || j                  j                  d      | j                  D ci c]  }|||   
 c}      |d   fL c}}S c c}w c c}}w # t        $ r:}t        j                  dt        |       dt        |       d       g cY d}~S d}~ww xY w)r   r'   r   r   r   r   r   N)r   rl   rf   rn   r   r   r   ra   r*   r   rp   rc   r   r   r   r   s           r   r   z:MyScaleWithoutJSON.similarity_search_with_relevance_scoresI  s    *   !1!1!=!=e!DaS	 **51??A	  %&t{{'='=f'E%F373F3F!Ga!QqT'!G fI	 	 "H		  	LL?47)3CCF87STI	sA   ,B8 3B2B-B2)B8 -B22B8 8	C;/C60C;6C;c                     y)NrW   r   r|   s    r   r   z"MyScaleWithoutJSON.metadata_columnn  s    r   )
rs   r   ra   r   r   r   rt   r   r>   r   r2   r   r   r   r   r   )r?   r@   rA   rB   r`   r   r   r   r   r   r   r   s   @r   r   r     s     -1$&	88 *8 "	8
 8 
8& IM (+8E	. #'	"" " !	"
 " 
"J BF## #1>#QT#	%#J  r   r   )r   r   r   r   r>   bool)
__future__r   r   logginghashlibr   	threadingr   typingr   r   r   r	   r
   r   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.vectorstoresr   pydantic_settingsr   r   	getLoggerrc   r   r   rF   r   r   r   r   <module>r
     se    "     = = - 0 3 >				 <l <~V2k V2rw wr   