
    h                        d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d d
lmZ   G d de      Z!y)    )annotationsN)contextmanager)StringIO)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTupleType)
Embeddings)VectorStore)Document)
connection)cursorc                  l   e Zd ZdZ G d deej                        Z G d d      Zdddd	 	 	 	 	 	 	 	 	 	 	 	 	 d$d	Z	 G d
 d      Z
d%dZd%dZ	 	 d&	 	 	 	 	 	 	 d'dZ	 d(	 	 	 	 	 	 	 d)dZd*dZ	 d(	 	 	 	 	 	 	 d+dZ	 	 	 	 	 	 	 	 d,dZe	 	 	 	 	 d-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d.d       Z	 	 d&	 	 	 	 	 	 	 d/dZ	 d0	 	 	 	 	 	 	 d1dZd2dZ	 d3	 	 	 	 	 	 	 d4dZ	 d3	 	 	 	 	 	 	 d5dZ	 d3	 	 	 	 	 	 	 d6dZ	 d3	 	 	 	 	 	 	 d7dZ	 d(	 	 	 	 	 d8dZ	 	 	 	 	 	 	 	 d9dZd:dZd%dZd%dZd;d Z d;d!Z!	 	 	 	 	 	 d<d"Z"d=d#Z#y)>YellowbrickzYellowbrick as a vector database.
    Example:
        .. code-block:: python
            from langchain_community.vectorstores import Yellowbrick
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            ...
    c                      e Zd ZdZdZdZy)Yellowbrick.IndexTypez<Enumerator for the supported Index types within Yellowbrick.nonelshN)__name__
__module____qualname____doc__NONELSH     j/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/yellowbrick.py	IndexTyper   *   s    Jr!   r#   c                  .    e Zd ZdZ	 	 d	 	 	 ddZdddZy)	Yellowbrick.IndexParamsz/Parameters for configuring a Yellowbrick index.Nc                `    |t         j                  j                  }|| _        |xs i | _        y N)r   r#   r   
index_typeparams)selfr(   r)   s      r"   __init__z Yellowbrick.IndexParams.__init__3   s-    
 !(2277
(DO ,BDKr!   c                :    | j                   j                  ||      S r'   )r)   get)r*   keydefaults      r"   	get_paramz!Yellowbrick.IndexParams.get_param=   s    ;;??300r!   NN)r(   z!Optional['Yellowbrick.IndexType']r)   zOptional[Dict[str, Any]]r'   )r.   strr/   r   returnr   )r   r   r   r   r+   r0   r    r!   r"   IndexParamsr%   0   s*    = =A/3	'9	' -	'	1r!   r4   NF)schemaloggerdropc                  ddl m} |j                          |r|| _        nt	        j
                  t              | _        | j                  j                  t        j                         t	        j                         }|j                  t        j                         t	        j                  d      }	|j                  |	       | j                  j                  |       t        |t              s| j                  j!                  d       yd| _        d| _        d| _        || _        t*        j-                  || j                        | _        t1        j2                  | j.                  j4                         || _        || _        || _        d| _        | j?                          | j.                  jA                         5 }
|rn| jC                  | j8                  | j6                  |
	       | jC                  | j8                  | j&                  z   | j6                  |
	       | jE                  |
       | jG                  |
       | jI                  |
       ddd       y# 1 sw Y   yxY w)
zInitialize with yellowbrick client.
        Args:
            embedding: Embedding operator
            connection_string: Format 'postgres://username:password@host:port/database'
            table: Table used to store / retrieve embeddings from
        r   )extrasz)%(asctime)s - %(levelname)s - %(message)sz+embeddings input must be Embeddings object.N
_lsh_index_lsh_hyperplane_content)tabler5   r   )%psycopg2r9   register_uuidr6   logging	getLoggerr   setLevelERRORStreamHandlerDEBUG	FormattersetFormatter
addHandler
isinstancer   errorLSH_INDEX_TABLELSH_HYPERPLANE_TABLECONTENT_TABLEconnection_stringr   DatabaseConnectionr   atexitregisterclose_connection_schema_table
_embedding_max_embedding_len_check_database_utf8
get_cursorr7   _drop_lsh_index_tables_create_schema_create_table)r*   	embeddingrN   r=   r5   r6   r7   r9   handler	formatterr   s              r"   r+   zYellowbrick.__init__@   s     	$ DK!++H5DKKK  /++-GW]]+))*UVI  +KK""7+)Z0KKKL$0):!",!2%889JDKKX889#"&!!#__'') 	'V		DLL	P		++(:(::<<!  
 ++F3'v&	' 	' 	's   8BIIc                       e Zd ZU dZded<   dZded<   ded<   	 	 	 	 	 	 d fdZdd	Zdd
Ze	dd       Z
e	dd       Z xZS )Yellowbrick.DatabaseConnectionNr2   _connection_stringzOptional['PgConnection']_connectionlogging.Logger_loggerc                    | j                   6t        | 	  |       | _         || j                   _        || j                   _        | j                   S r'   )	_instancesuper__new__ra   rd   )clsrN   r6   	__class__s      r"   rh   z&Yellowbrick.DatabaseConnection.__new__   sA     }}$ % 43D0(.%== r!   c                    | j                   r9| j                   j                  s"| j                   j                          d | _         y y y r'   )rb   closedclose)r*   s    r"   rR   z/Yellowbrick.DatabaseConnection.close_connection   s<    (8(8(?(?  &&(#'  )@r!   c                    dd l }| j                  r| j                  j                  r1|j                  | j                        | _        d| j                  _        | j                  S )Nr   F)r>   rb   rl   connectra   
autocommit)r*   r>   s     r"   get_connectionz-Yellowbrick.DatabaseConnection.get_connection   sO    ##t'7'7'>'>#+#3#3D4K4K#L .3  +###r!   c              #     K   ddl m} | j                         }	 | |j                          y # |$ r>}|j	                          | j
                  j                  dd       t        d      |d }~ww xY ww)Nr   )DatabaseErrorz2Database error occurred, rolling back transaction.T)exc_infozDatabase transaction failed.)r>   rs   rq   commitrollbackrd   rJ   RuntimeError)r*   rs   connes       r"   get_managed_connectionz5Yellowbrick.DatabaseConnection.get_managed_connection   su     .&&(D	
  ! J""HSW #  ##ABIJs$   A5/ A5A29A--A22A5c              #     K   | j                         5 }|j                         }	 | |j                          	 d d d        y # |j                          w xY w# 1 sw Y   y xY wwr'   )rz   r   rm   )r*   rx   r   s      r"   rX   z)Yellowbrick.DatabaseConnection.get_cursor   sX     ,,. #$# LLLN# #
 LLN# #s1   A$AAA	A$AAA!A$)rN   r2   r6   rc   r3   z 'Yellowbrick.DatabaseConnection'r3   None)r3   z'PgConnection')r3   z%Generator['PgConnection', None, None])r3   z!Generator['PgCursor', None, None])r   r   r   rf   __annotations__rb   rh   rR   rq   r   rz   rX   __classcell__)rj   s   @r"   rO   r`   ~   sn    	04-4	!$'	!1?	!-	!	(
	$ 
	 
	 
	# 
	#r!   rO   c                    ddl m} | j                  rJ|j                  |j	                  d      j                  |j                  | j                                     yy)z>
        Helper function: create schema if not exists
        r   sqlzE
                    CREATE SCHEMA IF NOT EXISTS {s}
                )sN)r>   r   rS   executeSQLformat
Identifier)r*   r   r   s      r"   rZ   zYellowbrick._create_schema   sQ     	!<<NN &nnT\\2   r!   c                .   ddl m} | j                  r| j                  fnd} |j                  g || j                  | j
                  z    }|j                  | j                  | j
                  z   dz         }|j                  |j                  d      j                  ||             | j                  r| j                  fnd} |j                  g || j                   } |j                  g || j                  | j
                  z    }|j                  | j                  | j
                  z   dz         }|j                  | j                  | j
                  z   dz         }	|j                  |j                  d	      j                  ||||	
             y)z=
        Helper function: create table if not exists
        r   r   r    
_pk_doc_ida0  
                CREATE TABLE IF NOT EXISTS {t} (
                doc_id UUID NOT NULL,
                text VARCHAR(60000) NOT NULL,
                metadata VARCHAR(1024) NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            tc_pk_doc_id_embedding_id
_fk_doc_ida  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                embedding_id SMALLINT NOT NULL,
                embedding FLOAT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, embedding_id),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            t1t2c1c2N)	r>   r   rS   r   rT   rM   r   r   r   )
r*   r   r   schema_prefixr   r   r   r   r   r   s
             r"   r[   zYellowbrick._create_table   ss    	!+/<<RCNNLML4;;9K9K+KLNN4;;););;lJKGG	 f  	
  ,0<<RS^^8]8DKK8S^^M]MDKK$:L:L,LM^^KK$,,,/HH
 ^^DKK$*<*<<|KLGG
 f	  	
r!   c                    |8| j                   j                         5 }| j                  |||       ddd       y| j                  |||       y# 1 sw Y   yxY w)z
        Helper function: Drop data. If a cursor is provided, use it;
        otherwise, obtain a new cursor for the operation.
        N)r5   )r   rX   _drop_table)r*   r=   r5   r   s       r"   r7   zYellowbrick.drop   sa     >++- ?  v >? ? VU6:? ?s   AAc                    ddl m} |r|j                  ||      }n|j                  |      }|j                  d      j	                  |      }|j                  |       y)zI
        Executes the drop table command using the given cursor.
        r   r   z1
        DROP TABLE IF EXISTS {} CASCADE
        N)r>   r   r   r   r   r   )r*   r   r=   r5   r   
table_namedrop_table_querys          r"   r   zYellowbrick._drop_table  sY     	!6J.J77
 &
	 	
 	'(r!   c                   | j                   j                         5 }d}|j                  |       |j                         d   }ddd       j	                         dk(  s|j	                         dk(  ryt        d      # 1 sw Y   ;xY w)zE
        Helper function: Test the database is UTF-8 encoded
        z
                SELECT pg_encoding_to_char(encoding)
                FROM pg_database
                WHERE datname = current_database();
            r   Nutf8zutf-8TzDatabase encoding is not UTF-8)r   rX   r   fetchonelower	Exception)r*   r   queryencodings       r"   rW   z Yellowbrick._check_database_utf8  s     __'') 	,VE
 NN5!(+H	, >>v%)9W)D<==	, 	,s   'A<<Bc           
        d}t        |      }| j                  j                  t        |            }g }|s|D cg c]  }i  }}|j                  d      xs t        j                         }| j                  j                         5 }	t               }
t               }t        j                  |
ddt        j                        }t        j                  |ddt        j                        }d}t        |      D ]  \  }}t        t        j                               }|j!                  |       |j#                  ||t%        j&                  ||         g       t        ||         D ]  \  }}|j#                  |||g        |dz  }||k\  s| j)                  |	|
|       |
j+                  d       |
j-                  d       |j+                  d       |j-                  d       d} |dkD  r| j)                  |	|
|       d d d        |j.                  t        j0                  j2                  k(  r%| j5                  |t        j6                               |S c c}w # 1 sw Y   \xY w)Ni'  index_params	")	delimiter	quotecharquotingr      )listrU   embed_documentsr-   r   r4   r   rX   r   csvwriterQUOTE_MINIMAL	enumerater2   uuiduuid4appendwriterowjsondumps_copy_to_dbseektruncater(   r#   r   _update_indexUUID)r*   texts	metadataskwargs
batch_size
embeddingsresults_r   r   
content_ioembeddings_iocontent_writerembeddings_writercurrent_batch_sizeitextdoc_uuidembedding_idr\   s                       r"   	add_textszYellowbrick.add_texts3  s*    
U__44T%[A
%*++I+zz.1N[5L5L5N__'')  	DV!J$JM ZZdc3CTCTN !$

cFWFW! "#$U+ +4tzz|,x(''4IaL9Q(RS/8A/G T+L)%..,	/RST #a'"%3$$VZGOOA&''*!&&q)!**1-)*&%+( "A%  ]CA 	DD ""k&;&;&?&??|TYYx-@AS , 	D  	Ds   	IDI	A3II c                   |j                  d       |j                  d       ddlm} | j                  r| j                  fnd} |j                  g || j
                  | j                  z    }|j                  d      j                  |      }|j                  ||       | j                  r| j                  fnd} |j                  g || j
                   }|j                  d      j                  |      }|j                  ||       y )Nr   r   r    z
            COPY {table} (doc_id, text, metadata) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )r=   z
            COPY {table} (doc_id, embedding_id, embedding) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )
r   r>   r   rS   r   rT   rM   r   r   copy_expert)	r*   r   r   r   r   r   r=   content_copy_queryembeddings_copy_querys	            r"   r   zYellowbrick._copy_to_dbj  s     	1 +/<<RPPt{{T=O=O/OP WW

 &u&
 	 	-z:+/<<R;;t{{; #!

 &u&
 	 	0-@r!   c                J     | |||||      }	 |	j                   d||d| |	S )a  Add texts to the vectorstore index.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            connection_string: URI to Yellowbrick instance
            embedding: Embedding function
            table: table to store embeddings
            kwargs: vectorstore specific parameters
        )r\   rN   r=   r5   r7   )r   r   r    )r   )
ri   r   r\   r   rN   r=   r5   r7   r   vsss
             r"   
from_textszYellowbrick.from_texts  s<    * /
 	AEYA&A
r!   c                   ddl m |rj                  d      }nc|Vt        fd|D              }j                  d      j	                  |      }j                  d      j                  |      }nt        d	      | j                  r| j                  fnd
}| j                  j                         5 } j                  g || j                  | j                  z    }	j                  d      j                  |	|      }
|j                  |
        j                  g || j                   }	j                  d      j                  |	|      }
|j                  |
        | j                  || j                  | j                  z   g| r^ j                  g || j                  | j                  z    }	j                  d      j                  |	|      }
|j                  |
       ddd       y# 1 sw Y   yxY w)zxDelete vectors by uuids.

        Args:
            ids: List of ids to delete, where each id is a uuid string.
        r   r   z'
                WHERE 1=1
            Nc              3  @   K   | ]  }j                  |        y wr'   )Literal).0idr   s     r"   	<genexpr>z%Yellowbrick.delete.<locals>.<genexpr>  s     8b#++b/8s   z, z5
                WHERE doc_id IN ({ids})
            )idsz*Either ids or delete_all must be provided.r    zDELETE FROM {table} {where_sql})r=   	where_sql)r>   r   r   tuplejoinr   
ValueErrorrS   r   rX   r   rT   rM   r   _table_existsrK   )r*   r   
delete_allr   r   uuidsids_formattedr   r   table_identifierr   r   s              @r"   deletezYellowbrick.delete  s    	!I
 _8C88EGGDM..u5M f!  	  IJJ+/<<R__'') 	&V-s~~    $d.@.@ @  GG=>EE&) F E NN5!-s~~J}JdkkJGG=>EE&) F E NN5!!t!!d&:&::=J $23>> $"$$(KK$2F2F$F$   ABII*i J  u%1	&4 5	&4 s   4D9G66G?c                    ddl m} |j                  |      }|j                  |      }|j                  |j	                  d      j                  ||             |j                         d   dkD  S )z>
        Checks if a table exists in the given schema
        r   r   z
                SELECT COUNT(*)
                FROM sys.table t INNER JOIN sys.schema s ON t.schema_id = s.schema_id
                WHERE s.name = {schema} AND t.name = {table_name}
            )r5   r   )r>   r   r   r   r   r   r   )r*   r   r   r5   r   s        r"   r   zYellowbrick._table_exists  sp     	!V$[[,
GG f%  	
  #a''r!   c                    dd l }dj                  t        t        |            }|j	                  |j                               }|j                         }t        j                  |d d       }|S )Nr   ,   )bytes)	hashlibr   mapr2   sha1encodedigestr   r   )r*   vectorr   
vector_strhash_objecthash_digestvector_uuids          r"   _generate_vector_uuidz!Yellowbrick._generate_vector_uuid  sZ    XXc#v./
ll:#4#4#67!((*iik#2&67r!   c                x   ddl m} ddlm} |j	                  d      xs t
        j                         }| j                  j                         5 }d| j                  z   }| j                  |      }	|j                  d      j                  |j                  |            }
|j                  |
       t        |      D cg c]  \  }}t!        |	      ||f }}}|j                  d      j                  |j                  |            } ||||       |j                  |      }| j"                  r| j"                  fnd} |j                  g || j                   } |j                  g || j                  | j$                  z    }|j&                  t
        j(                  j*                  k(  r| j                  d	z   }| j-                  |||       | j"                  r| j"                  fnd} |j                  g || j                  | j.                  z    }|j                  |      }|j                  d
      j                  ||||||j1                  |j3                  dd                  }|j                  ||f       |j5                         }nF|j                  d      j                  |||      }|j                  ||f       |j5                         }ddd       g }D ]D  }t7        j8                  |d         xs i }t;        |d   |      }|j=                  ||d   f       F |S c c}}w # 1 sw Y   \xY w)a  Perform a similarity search with Yellowbrick with vector

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document, float]: List of Documents and scores
        r   r   )execute_valuesr   tmp_z 
                CREATE TEMPORARY TABLE {} (
                doc_id UUID,
                embedding_id SMALLINT,
                embedding FLOAT)
                ON COMMIT DROP
                DISTRIBUTE REPLICATE
            z:INSERT INTO {} (doc_id, embedding_id, embedding) VALUES %sr    	_tmp_hasha/  
                    WITH index_docs AS (
                    SELECT
                        t1.doc_id,
                        SUM(ABS(t1.hash-t2.hash)) as hamming_distance
                    FROM
                        {lsh_index} t1
                    INNER JOIN
                        {input_hash_table} t2
                    ON t1.hash_index = t2.hash_index
                    GROUP BY t1.doc_id
                    HAVING hamming_distance <= {hamming_distance}
                    )
                    SELECT
                        text,
                        metadata,
                       SUM(v1.embedding * v2.embedding) /
                        (SQRT(SUM(v1.embedding * v1.embedding)) *
                       SQRT(SUM(v2.embedding * v2.embedding))) AS score
                    FROM
                        {v1} v1
                    INNER JOIN
                        {v2} v2
                    ON v1.embedding_id = v2.embedding_id
                    INNER JOIN
                        {v3} v3
                    ON v2.doc_id = v3.doc_id
                    INNER JOIN
                        index_docs v4
                    ON v2.doc_id = v4.doc_id
                    GROUP BY v3.doc_id, v3.text, v3.metadata
                    ORDER BY score DESC
                    LIMIT %s
                hamming_distance)v1v2v3	lsh_indexinput_hash_tabler   a  
                    SELECT 
                        text,
                        metadata,
                        score
                    FROM
                        (SELECT
                            v2.doc_id doc_id,
                            SUM(v1.embedding * v2.embedding) /
                            (SQRT(SUM(v1.embedding * v1.embedding)) *
                            SQRT(SUM(v2.embedding * v2.embedding))) AS score
                        FROM
                            {v1} v1
                        INNER JOIN
                            {v2} v2
                        ON v1.embedding_id = v2.embedding_id
                        GROUP BY v2.doc_id
                        ORDER BY score DESC LIMIT %s
                        ) v4
                    INNER JOIN
                        {v3} v3
                    ON v4.doc_id = v3.doc_id
                    ORDER BY score DESC
                )r   r   r   Nr   )page_contentmetadata   )r>   r   psycopg2.extrasr   r-   r   r4   r   rX   rT   r   r   r   r   r   r   r2   rS   rM   r(   r#   r   _generate_tmp_lsh_hashesrK   r   r0   fetchallr   loadsr   r   )r*   r\   kr   r   r   r   r   tmp_embeddings_table
tmp_doc_idcreate_table_queryr   embedding_value
data_inputinsert_queryr   r   r   r   tmp_hash_tabler   r   	sql_queryr   	documentsresultr   docs                               r"   &similarity_search_with_score_by_vectorz2Yellowbrick.similarity_search_with_score_by_vector  s;    	!2zz.1N[5L5L5N__'') z	,V#)DKK#7 33I>J!$	" fS^^$89:  NN-. 6?y5I1L/ Z,@J  77LfS^^$89:  6<< 45B/3||T\\OM<<<BQQd>P>P0PQB&&+*?*?*C*CC!%{!:--(" 48<<R*CNN "$(KK$2F2F$F	 $'>>.#A GG!#F &'%5%([[$../A1E&  	G Z D !//+GG2 &  3 < y1$/ //+uz	,x 35	 	/Fzz&),2Hq	HECc6!9-.	/
 iz	, z	,s    A/L0=L*H L0*L00L9c                    | j                   j                  |      } | j                  d||d|}|D cg c]  \  }}|	 c}}S c c}}w )ae  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of Documents
        r\   r  r    rU   embed_queryr  )r*   r   r  r   r\   r  r  r   s           r"   similarity_searchzYellowbrick.similarity_search  sV     OO//6	?D?? 
1
(.
	 #,,Q,,,s   Ac                f    | j                   j                  |      } | j                  d||d|}|S )ar  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of (Document, similarity)
        r  r    r  )r*   r   r  r   r\   r  s         r"   similarity_search_with_scorez(Yellowbrick.similarity_search_with_score  sD     OO//6	?D?? 
1
(.
	 r!   c                b     | j                   d||d|}|D cg c]  \  }}|	 c}}S c c}}w )a  Perform a similarity search with Yellowbrick by vectors

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of documents
        r  r    )r  )r*   r\   r  r   r  r  r   s          r"   similarity_search_by_vectorz'Yellowbrick.similarity_search_by_vector  sC     @D?? 
1
(.
	 #,,Q,,,s   +c                   ddl m} | j                  r| j                  fnd} |j                  g || j                  | j
                  z    } |j                  g || j                  | j                  z    } |j                  g || j                   }|j                  d      j                  |      }|r9|j                  d      j                  |j                  t        |                  n|j                  d      }	|j                  d      }
|j                  d	      j                  ||||	|

      }|j                  |       y)zAdd hashes to LSH indexr   r   r    zINSERT INTO {}zWHERE e.doc_id = {doc_id})doc_id zGROUP BY 1, 2av  
            {query_prefix}
            SELECT
                e.doc_id as doc_id,
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {condition}
            {group_by}
        )query_prefixembedding_tablehyperplanes	conditiongroup_byN)r>   r   rS   r   rT   rL   rK   r   r   r   r2   r   )r*   r   r  r   r   lsh_hyperplane_tablelsh_index_table_idembedding_table_idquery_prefix_idr   r!  input_querys               r"   _update_lsh_hasheszYellowbrick._update_lsh_hashes  sI    	!+/<<R-s~~  
 
 KK$*C*CC 
 ,S^^ 

 KK$*>*>>
 ,S^^H]HDKKH''"23::;MN  GG/077s{{3v;?W7X 	
 77?+gg

 &(.,  
 	& 	{#r!   c                   ddl m} | j                  r| j                  fnd} |j                  g || j                  | j
                  z    }|j                  |      }|j                  |      }|j                  d      j                  |      }	|j                  d      }
|j                  d      j                  |	|||
      }|j                  |       y)	zGenerate temp LSHr   r   r    z+CREATE TEMPORARY TABLE {} ON COMMIT DROP ASz
GROUP BY 1a[  
            {query_prefix}
            SELECT
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {group_by}
            DISTRIBUTE REPLICATE
        )r  r  r  r!  N)	r>   r   rS   r   rT   rL   r   r   r   )r*   r   tmp_embedding_tabler  r   r   r"  tmp_embedding_table_idtmp_hash_table_idr  r!  r&  s               r"   r  z$Yellowbrick._generate_tmp_lsh_hashes  s     	!+/<<R-s~~  
 
 KK$*C*CC 
 "%0C!DNN>:wwLMTT
 77<(gg	
 &%2,	  
 	" 	{#r!   c                   ddl m} | j                  r| j                  fnd} |j                  g || j                  | j
                  z    }|j                  |j                  d      j                  |             |j                         d   dkD  ry |j                  g || j                   }|j                  |j                  d      j                  |             |j                         d   }|dz  }|j                  d	      j                  |j                  |      |j                  |      |
      }|j                  |       y)z4Generate random hyperplanes and store in Yellowbrickr   r   r    zSELECT COUNT(*) FROM {t})r   Nz!SELECT MAX(embedding_id) FROM {t}r   a2  
            WITH parameters AS (
                SELECT {num_hyperplanes} AS num_hyperplanes,
                    {dims_per_hyperplane} AS dims_per_hyperplane
            )
            INSERT INTO {hyperplanes_table} (id, hyperplane_id, hyperplane)
                SELECT id, hyperplane_id, (random() * 2 - 1) AS hyperplane
                FROM
                (SELECT range-1 id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT num_hyperplanes FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) a,
                (SELECT range-1 hyperplane_id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT dims_per_hyperplane FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) b
        )num_hyperplanesdims_per_hyperplanehyperplanes_table)r>   r   rS   r   rT   rL   r   r   r   r   r   )	r*   r   r-  r   r   r/  r   num_dimensionsr
  s	            r"   _populate_hyperplanesz!Yellowbrick._populate_hyperplanes*  s1    +/<<R*CNN 

 KK$*C*CC
 	sww9:AADUAVW??Q!#CNN7M74;;7swwBCJJQJOP*1-!ww
$ &KK8 #N ;/  
% 	. 	|$r!   c                H   ddl m} | j                  r| j                  fnd} |j                  g || j                  | j
                  z    } |j                  g || j                  | j                  z    }|j                  | j                  | j
                  z   dz         }|j                  | j                  | j
                  z   dz         }|j                  |j                  d      j                  ||||             | j                  r| j                  fnd} |j                  g || j                  | j                  z    }|j                  | j                  | j                  z   dz         }	|j                  |j                  d	      j                  ||	
             y)z&Create LSH index and hyperplane tablesr   r   r    r   r   a  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                hash_index SMALLINT NOT NULL,
                hash SMALLINT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, hash_index),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            r   _pk_id_hp_ida2  
                CREATE TABLE IF NOT EXISTS {t} (
                id SMALLINT NOT NULL,
                hyperplane_id SMALLINT NOT NULL,
                hyperplane FLOAT NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (id, hyperplane_id))
                DISTRIBUTE REPLICATE SORT ON (id)
            r   N)r>   r   rS   r   rT   rK   rM   r   r   r   rL   )
r*   r   r   r   r   r   r   r   r   r   s
             r"   _create_lsh_index_tablesz$Yellowbrick._create_lsh_index_tablesT  sv    +/<<RS^^O]ODKK$:N:N,NOS^^M]MDKK$:L:L,LM^^DKK$*>*>>MN^^DKK$*>*>>MNGG
 f	  	
& ,0<<RCNNSMS4;;9R9R+RSNN4;;)B)BB^STGG	 f  	
r!   c                    | j                  | j                  | j                  | j                  z   |       | j                  | j                  | j                  | j                  z   |       y)zDrop LSH index tables)r5   r=   r   N)r7   rS   rT   rK   rL   )r*   r   s     r"   rY   z"Yellowbrick._drop_lsh_index_tables  s]    		<<t{{T5I5I'IRX 	 	
 			<<++ 9 99 	 	
r!   c                \   |j                   t        j                  j                  k(  ry| j                  j                         5 }| j                  |       | j                  |       | j                  ||j                  dd             | j                  |       ddd       yy# 1 sw Y   yxY w)z"Create index from existing vectorsr-     N)r(   r   r#   r   r   rX   rY   r4  r1  r0   r'  r*   r   r   s      r"   create_indexzYellowbrick.create_index  s    ""k&;&;&?&??++- 0++F3--f5**L223DcJ ''/0 0 @0 0s   AB""B+c                    |j                   t        j                  j                  k(  r5| j                  j                         5 }| j                  |       ddd       yy# 1 sw Y   yxY w)zDrop an indexN)r(   r   r#   r   r   rX   rY   r8  s      r"   
drop_indexzYellowbrick.drop_index  s\    ""k&;&;&?&??++- 4++F34 4 @4 4s   AA'c                    |j                   t        j                  j                  k(  r6| j                  j                         5 }| j                  ||       ddd       yy# 1 sw Y   yxY w)zHUpdate an index with a new or modified embedding in the embeddings tableN)r(   r   r#   r   r   rX   r'  )r*   r   r  r   s       r"   r   zYellowbrick._update_index  s`     ""k&;&;&?&??++- 8''78 8 @8 8s   AA(c                   ddl m} 	 | j                  j                         5 }| j                  r| j                  fnd} |j
                  g || j                   } |j
                  g || j                  dz    } |j
                  g || j                  | j                  z    }|j                  d      j                  ||      }|j                  |       | j                  |       |j                  d      j                  ||      }|j                  |       |j                  d      j                  ||      }	|j                  |	       d d d        y # 1 sw Y   y xY w# t        $ r}
t        d	|
       |
d }
~
ww xY w)
Nr   r   r    _v1zALTER TABLE {t1} RENAME TO {t2})r   r   z
                    INSERT INTO {t1} (doc_id, embedding_id, embedding) 
                    SELECT id, embedding_id, embedding FROM {t2}
                z
                    INSERT INTO {t1} (doc_id, text, metadata) 
                    SELECT DISTINCT id, text, metadata FROM {t2}
                zFailed to migrate schema: )r>   r   r   rX   rS   r   rT   rM   r   r   r   r[   r   rw   )r*   r   r   r   r   old_embeddingscontentalter_table_queryr
  insert_content_queryry   s              r"   migrate_schema_v1_to_v2z#Yellowbrick.migrate_schema_v1_to_v2  s    #	H++-  537<<R+S^^H]HDKKH
!/!T!Te@S!T(#.. "$(KK$2D2D$D %(GG,M$N$U$U!% %V %! 01""6*"ww 
 &!%    |,'*ww(
 &G&7 % 34A 5  5  5B  	H!;A3?@aG	Hs5   E' D0EE' E$ E' $E' '	F0E??F)r\   r   rN   r2   r=   r2   r5   Optional[str]r6   zOptional[logging.Logger]r7   boolr3   r}   )r   
'PgCursor'r3   r}   r1   )r=   r2   r5   rD  r   zOptional['PgCursor']r3   r}   r'   )r   rF  r=   r2   r5   rD  r3   r}   )r3   rE  )r   zIterable[str]r   Optional[List[dict]]r   r   r3   	List[str])r   rF  r   r   r   r   r3   r}   )Nr  	langchainpublicF)ri   zType[Yellowbrick]r   rH  r\   r   r   rG  rN   r2   r=   r2   r5   r2   r7   rE  r   r   r3   r   )r   zOptional[List[str]]r   zOptional[bool]r   r   r3   r}   )rJ  )r   rF  r   r2   r5   r2   r3   rE  )r   List[float]r3   	uuid.UUID)   )r\   rK  r  intr   r   r3   List[Tuple[Document, float]])r   r2   r  rN  r   r   r3   List[Document])r   r2   r  rN  r   r   r3   rO  )r\   rK  r  rN  r   r   r3   rP  )r   rF  r  zOptional[uuid.UUID]r3   r}   )r   rF  r)  r2   r  r2   r3   r}   )r   rF  r-  rN  r3   r}   )r   r%   r3   r}   )r   r%   r  rL  r3   r}   r|   )$r   r   r   r   r2   enumEnumr#   r4   r+   rO   rZ   r[   r7   r   rW   r   r   classmethodr   r   r   r   r  r  r  r  r'  r  r1  r4  rY   r9  r;  r   rC  r    r!   r"   r   r   !   su   C 1 1, !%+/<'<' <' 	<' <' )<' <' 
<'|4# 4#l"1
l !%'+	;; ; %	;
 
;( !%	)) ) 	)
 
).. +/55 (5 	5
 
5nA A.6AGOA	A8 
 +/!#   (	
      
 @ $(%); ; #; 	;
 
;| BJ( (.1(;>(	(0 01V$V),V<?V	%Vr $%-- -03-	-, $% 03	%, 01-$-),-<?-	-. '++$+$ $+$ 
	+$Z"$ "$7:"$LO"$	"$H(%T-
^	
	04838=F8	8&Hr!   r   )"
__future__r   rP   r   rQ  r   r@   r   
contextlibr   ior   typingr   r   r   r	   r
   r   r   r   r   langchain_core.embeddingsr   langchain_core.vectorstoresr   %langchain_community.docstore.documentr   psycopg2.extensionsr   PgConnectionr   PgCursorr   r    r!   r"   <module>r^     sS    "  
     % 
 
 
 1 3 :>6lH+ lHr!   