
    h|a                        d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	Zdd
ZddZ G d de      Zy)    )annotationsN)AnyCallableDictIterableListOptionalType)Document)
Embeddingsguard_import)VectorStore)maximal_marginal_relevance   c                     t        d      S )zImport lancedb package.lancedbr        f/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/vectorstores/lancedb.pyimport_lancedbr      s    	""r   c           
     ~    dj                  | j                         D cg c]  \  }}| d| d c}}      S c c}}w )z2Converts a dict filter to a LanceDB filter string.z AND z = '')joinitems)filterkvs      r   to_lance_filterr      s5    <<FLLNCDAqA3d1#QCDDCs   9
c                     e Zd ZdZdddddddddddd	ddef	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd
ZdddZedd       Z	 	 d 	 	 	 	 	 	 	 	 	 d!dZ		 d"	 	 	 	 	 d#dZ
	 	 	 	 	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%dZd&dZ	 	 d 	 	 	 	 	 	 	 	 	 d'dZ	 	 	 d(	 	 	 	 	 	 	 	 	 	 	 d)dZd*dZ	 	 	 d(	 	 	 	 	 	 	 	 	 	 	 d+dZ	 	 	 d(	 	 	 	 	 	 	 	 	 	 	 d+dZ	 	 d 	 	 	 	 	 	 	 	 	 d,dZ	 	 	 	 d-	 	 	 	 	 	 	 	 	 	 	 	 	 d.dZ	 	 	 	 d/	 	 	 	 	 	 	 	 	 	 	 	 	 d0dZ	 	 	 	 d/	 	 	 	 	 	 	 	 	 	 	 	 	 d1dZe	 	 	 	 	 	 	 	 	 	 	 	 d2	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3d       Z	 	 	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 	 	 d5dZy)6LanceDBay  `LanceDB` vector store.

    To use, you should have ``lancedb`` python package installed.
    You can install it with ``pip install lancedb``.

    Args:
        connection: LanceDB connection to use. If not provided, a new connection
                    will be created.
        embedding: Embedding to use for the vectorstore.
        vector_key: Key to use for the vector in the database. Defaults to ``vector``.
        id_key: Key to use for the id in the database. Defaults to ``id``.
        text_key: Key to use for the text in the database. Defaults to ``text``.
        table_name: Name of the table to use. Defaults to ``vectorstore``.
        api_key: API key to use for LanceDB cloud database.
        region: Region to use for LanceDB cloud database.
        mode: Mode to use for adding data to the table. Valid values are
              ``append`` and ``overwrite``. Defaults to ``overwrite``.



    Example:
        .. code-block:: python
            vectorstore = LanceDB(uri='/lancedb', embedding_function)
            vectorstore.add_texts(['text1', 'text2'])
            result = vectorstore.similarity_search('text1')
    Nz/tmp/lancedbvectoridtextvectorstore	overwritel2c                `   t        d      }t        d      |j                  _        || _        || _        || _        || _        |dk7  r|xs t        j                  d      nd| _	        |	| _
        |
| _        || _        || _        || _        d| _        t!        ||j"                  j$                        r|| _        n|d| _        nt)        d      t!        |t*              r(| j                  |j-                  d      rt)        d      | j                  t)        d	      t!        ||j.                  j0                        r|| _        nt!        |t*        |j.                  j4                  f      rt)        d
      | j                  |j7                  |      | _        nzt!        |t*              rj|j-                  d      r.|j7                  || j                  | j                        | _        n+|j7                  |      | _        t9        j:                  d       |j	 t!        ||j.                  j4                  |j                  j                  j<                  f      sJ || _        tA        |d      r|jB                  nd| _"        y| jI                  |d      | _        y# tF        $ r t)        d      w xY w)z$Initialize with Lance DB vectorstorer   zlancedb.remote.table LANCE_API_KEYNz9`reranker` has to be a lancedb.rerankers.Reranker object.zdb://z&API key is required for LanceDB cloud.z#embedding object should be providedzs`connection` has to be a lancedb.db.LanceDBConnection object.                `lancedb.db.LanceTable` is deprecated.)api_keyregionz[api key provided with local uri.                            The data will be stored locallynameremote_tablezj`table` has to be a lancedb.db.LanceTable or 
                    lancedb.remote.table.RemoteTable object.T)set_default)%r   remotetable
_embedding_vector_key_id_key	_text_keyosgetenvr+   r,   modedistanceoverride_relevance_score_fnlimit
_fts_index
isinstance	rerankersReranker	_reranker
ValueErrorstr
startswithdbLanceDBConnection_connection
LanceTableconnectwarningswarnRemoteTable_tablehasattrr-   _table_nameAssertionError	get_table)self
connection	embeddinguri
vector_keyid_keytext_key
table_namer+   r,   r8   r1   r9   rerankerrelevance_score_fnr;   r   s                    r   __init__zLanceDB.__init__:   sf   & y)+,BC#%!@G2w<"))O"<SW	 +=(
h 1 1 : :;%DN!DNK  c3DLL$8~~g& !IJJ??"BCCj'**">">?)D
S'***?*?$@A8 
 ||##*??3#7 c3'~~g.+2??dkk ,; ,( ,3??3+?( = !GJJ117>>3G3G3S3ST   $")%"8EJJn   ...FDK "  @ s   A(J J-c                6   |j                   j                  }d|v rd}n	d|v rd}nd }d|v }||sct        t        |            D cg c]E  }t	        || j
                     |   j                         |r|d   |   j                         ni       G c}S |r||ryt        t        |            D cg c][  }t	        || j
                     |   j                         |r|d   |   j                         ni       ||   |   j                         f] c}S y y c c}w c c}w )N	_distance_relevance_scoremetadata)page_contentr_   )schemanamesrangelenr   r5   as_py)rQ   resultsscorecolumns	score_colhas_metadataidxs          r   results_to_docszLanceDB.results_to_docs   s>   ..&&'!#I7**II!W,E !W.
 	 !(!8!=!C!C!EAMWZ05;;=SU  5 !W.  %,T^^%<S%A%G%G%I' ")!4S!9!?!?!A	 I&s+113  !Ys   A
D,A Dc                    | j                   S )N)r2   rQ   s    r   
embeddingszLanceDB.embeddings   s    r   c                   g }|xs+ |D cg c]  }t        t        j                               ! c}}| j                  j	                  t        |            }t        |      D ]R  \  }}	||   }
|r||   nd||   i}|j                  | j                  |
| j                  ||   | j                  |	d|i       T | j                         }|/| j                  j                  | j                  |      }|| _        n;| j                   |j#                  || j$                         n|j#                  |       d| _        |S c c}w )a  Turn texts into embedding and add it to the database

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            ids: Optional list of ids to associate with the texts.

        Returns:
            List of ids of the added texts.
        r#   r_   Ndata)r8   )rB   uuiduuid4r2   embed_documentslist	enumerateappendr3   r4   r5   rP   rF   create_tablerN   rL   r+   addr8   r<   )rQ   texts	metadatasidskwargsdocs_ro   rk   r$   rS   r_   tbls                r   	add_textszLanceDB.add_texts   s$   $ 77Ac$**,'7__44T%[A
"5) 
	IC"3I)2y~s3x8HHKK$$iLL#c(NND	
	 nn;""//0@0@t/LCDK||#499-
7 8s   $D;c                    ||r|| _         | j                   }n|}n| j                   }	 | j                  j                  |      S # t        $ r Y yw xY w)a  
        Fetches a table object from the database.

        Args:
            name (str, optional): The name of the table to fetch. Defaults to None
                                    and fetches current table object.
            set_default (bool, optional): Sets fetched table as the default table.
                                        Defaults to False.

        Returns:
            Any: The fetched table object.

        Raises:
            ValueError: If the specified table is not found in the database.

        N)rN   rF   
open_table	Exception)rQ   r-   r/   _names       r   rP   zLanceDB.get_table   s^    & #' (($$E	##..u55 		s   A 	AAc                    | j                  |      }|r|j                  |||||       y|r|j                  |       yt        d      )aO  
        Create a scalar(for non-vector cols) or a vector index on a table.
        Make sure your vector column has enough data before creating an index on it.

        Args:
            vector_col: Provide if you want to create index on a vector column.
            col_name: Provide if you want to create index on a non-vector column.
            metric: Provide the metric to use for vector index. Defaults to 'L2'
                    choice of metrics: 'L2', 'dot', 'cosine'
            num_partitions: Number of partitions to use for the index. Defaults to 256.
            num_sub_vectors: Number of sub-vectors to use for the index. Defaults to 96.
            index_cache_size: Size of the index cache. Defaults to None.
            name: Name of the table to create index on. Defaults to None.

        Returns:
            None
        )metricvector_column_namenum_partitionsnum_sub_vectorsindex_cache_sizez%Provide either vector_col or col_nameN)rP   create_indexcreate_scalar_indexrA   )	rQ   col_name
vector_colr   r   r   r   r-   r   s	            r   r   zLanceDB.create_index	  sV    6 nnT"#-- /!1   ##H-DEEr   c                    t        |d      5 }t        j                  |j                               j	                  d      cddd       S # 1 sw Y   yxY w)z!Get base64 string from image URI.rbzutf-8N)openbase64	b64encodereaddecode)rQ   rT   
image_files      r   encode_imagezLanceDB.encode_image3  sF    #t_ 	G
##JOO$56==gF	G 	G 	Gs   2A		Ac                   | j                         }|D cg c]  }| j                  |       }}|*|D cg c]  }t        t        j                               ! }}d}	| j
                  3t        | j
                  d      r| j
                  j                  |      }	nt        d      g }
t        |	      D ]P  \  }}|r||   nd||   i}|
j                  | j                  || j                  ||   | j                  ||   d|i       R |0| j                  j                  | j                   |
      }|| _        |S |j%                  |
       |S c c}w c c}w )	as  Run more images through the embeddings and add to the vectorstore.

        Args:
            uris List[str]: File path to the image.
            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
            ids (Optional[List[str]], optional): Optional list of IDs.

        Returns:
            List[str]: List of IDs of the added images.
        )rT   Nembed_image)uriszEembedding object should be provided and must have embed_image method.r#   r_   rq   )rP   r   rB   rs   rt   r2   rM   r   rA   rw   rx   r3   r4   r5   rF   ry   rN   rL   rz   )rQ   r   r|   r}   r~   r   rT   	b64_textsr   ro   rr   rk   embr_   s                 r   
add_imageszLanceDB.add_images8  s]   " nn <@@CT&&3&/@	@;.233tzz|$3C3
??&74??M+R44$4?JW  !*- 		HC)2y~s3x8HHKK$$cLL#c(NNIcN			 ;""//0@0@t/LCDK 
 GGDM
= A 4s
   E$Ec                   || j                   }| j                  |      }t        |t              rt	        |      }|j                  dd      }|j                  dd      }|j                  d      x}	rM|j                  || j                        j                  |      j                  |	      j                  ||      }
n=|j                  || j                        j                  |      j                  ||      }
|dk(  r(| j                  |
j                  | j                  	       |
j                         }t        |      d
k(  rt        j                  d       |S )N	prefilterF
query_typer"   metrics)queryr   )r   hybrid)rY   r   zNo results found for the query.)r;   rP   r=   dictr   getsearchr3   r   wherer@   rerankto_arrowrd   rI   rJ   )rQ   r   r   r   r-   r~   r   r   r   r   lance_queryr   s               r   _queryzLanceDB._queryl  s*    9

AnnT"fd#$V,FJJ{E2	ZZh7
jj++7+

4;K;K
Lqv3	  

4;K;K
Lqv3 
 !dnn&@7##%t9>MM;<r   c                   | j                   r| j                   S | j                  dk(  r| j                  S | j                  dk(  r| j                  S | j                  dk(  r| j                  S t        d| j                   d      )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        cosiner'   ipzANo supported normalization function for distance metric of type: z=.Consider providing relevance_score_fn to Chroma constructor.)r:   r9   _cosine_relevance_score_fn_euclidean_relevance_score_fn%_max_inner_product_relevance_score_fnrA   rn   s    r   _select_relevance_score_fnz"LanceDB._select_relevance_score_fn  s     ++333==H$222]]d"555]]d"===115 @OO r   c                    || j                   } | j                  ||f||d|}| j                  ||j                  dd            S )zD
        Return documents most similar to the query vector.
        r   r-   rg   Frg   )r;   r   rl   pop)rQ   rS   r   r   r-   r~   ress          r   similarity_search_by_vectorz#LanceDB.similarity_search_by_vector  sQ     9

Adkk)QKvDKFK##Cvzz'5/I#JJr   c           
         || j                   }| j                         } | j                  ||fddi|}|D 	cg c]  \  }}	| |t        |	            f c}	}S c c}	}w )zZ
        Return documents most similar to the query vector with relevance scores.
        rg   T)r;   r   r   float)
rQ   rS   r   r   r-   r~   rZ   docs_and_scoresdocrg   s
             r   1similarity_search_by_vector_with_relevance_scoresz9LanceDB.similarity_search_by_vector_with_relevance_scores  s|     9

A!<<>:$::q
 $
(.
 GV
8BUS$U5\23
 	
 
s   Ac                   || j                   }|j                  dd      }|j                  dd      }|j                  dd      }| j                  t        d      |dk(  s|d	k(  r| j                  | j
                  | j                  |      }|j                  | j                  d
      | _        |d	k(  r | j                  j                  |      }	|	|f}
n|}
 | j                  |
|f||d|}| j                  ||      S t        d      | j                  j                  |      }	 | j                  |	|fd|i|}| j                  ||      S )zAReturn documents most similar to the query with relevance scores.Nrg   Tr-   r   r"   z4search needs an emmbedding function to be specified.ftsr   )replacer   r   z?Full text/ Hybrid search is not supported in LanceDB Cloud yet.r   )r;   r   r2   rA   r+   r<   rP   create_fts_indexr5   embed_queryr   rl   NotImplementedError)rQ   r   r   r   r~   rg   r-   r   r   rS   r   r   s               r   similarity_search_with_scorez$LanceDB.similarity_search_with_score  sZ    9

A

7D)zz&$'ZZh7
??"STT*"8||#(?nnT*"%"6"6t~~t"6"T) $ ; ;E BI'/F"F!dkk&!PFPP++Cu+==)U  33E:I$++iD6DVDC''5'99r   c           
     8     | j                   d|||||dd|}|S )ap  Return documents most similar to the query

        Args:
            query: String to query the vectorstore with.
            k: Number of documents to return.
            filter (Optional[Dict]): Optional filter arguments
                sql_filter(Optional[string]): SQL filter to apply to the query.
                prefilter(Optional[bool]): Whether to apply the filter prior
                                             to the vector search.
        Raises:
            ValueError: If the specified table is not found in the database.

        Returns:
            List of documents most similar to the query.
        F)r   r   r-   r   r   rg   r   )r   )rQ   r   r   r-   r   r   r~   r   s           r   similarity_searchzLanceDB.similarity_search  s7    0 0d// 
14Cu
PV
 
r   c                    || j                   }| j                  t        d      | j                  j                  |      }| j	                  |||||      }|S )a?  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        zBFor MMR search, you must specify an embedding function oncreation.)lambda_multr   )r;   r2   rA   r   'max_marginal_relevance_search_by_vector)	rQ   r   r   fetch_kr   r   r~   rS   r   s	            r   max_marginal_relevance_searchz%LanceDB.max_marginal_relevance_search  sm    4 9

A??"T  OO//6	;;# < 
 r   c                F    | j                   d|||d|}t        t        j                  |t        j                        |d   j                         |xs | j                  |      }| j                  |      }	t        |	      D 
cg c]  \  }
}|
|v s| }}
}|S c c}}
w )aH  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        )r   r   r   )dtyper"   )r   r   r   )	r   r   nparrayfloat32	to_pylistr;   rl   rw   )rQ   rS   r   r   r   r   r~   rf   mmr_selected
candidatesirselected_resultss                r   r   z/LanceDB.max_marginal_relevance_search_by_vectorC  s    6 $++ 

 	
 2HHYbjj1H'')o4::#	
 ))'2
*3J*?U$!Q1CTAUU Vs   BBc                \    t        d|||||||	|
||||d|}|j                  ||       |S )N)rR   rS   rU   rV   rW   rX   r+   r,   r8   r9   rY   rZ   )r|   r   )r!   r   )clsr{   rS   r|   rR   rU   rV   rW   rX   r+   r,   r8   r9   rY   rZ   r~   instances                    r   
from_textszLanceDB.from_textsp  s[    &  
!!!1
 
 	5I6r   c                ^   | j                  |      }|r|j                  |       y|r=|j                  | j                   dj                  dj	                  |                   y|r)| j
                  t        d      |j                  |       y|r|j                  d       yt        d      )a  
        Allows deleting rows by filtering, by ids or drop columns from the table.

        Args:
            filter: Provide a string SQL expression -  "{col} {operation} {value}".
            ids: Provide list of ids to delete from the table.
            drop_columns: Provide list of columns to drop from the table.
            delete_all: If True, delete all rows from the table.
        z
 in ('{}'),Nz;Column operations currently not supported in LanceDB Cloud.truez6Provide either filter, ids, drop_columns or delete_all)	rP   deleter4   formatr   r+   r   drop_columnsrA   )rQ   r}   
delete_allr   r   r-   r~   r   s           r   r   zLanceDB.delete  s    $ nnT"JJvJJ$,,|4;;CHHSMJK||')Q    .JJvUVVr   )rR   Optional[Any]rS   Optional[Embeddings]rT   Optional[str]rU   r   rV   r   rW   r   rX   r   r+   r   r,   r   r8   r   r1   r   r9   r   rY   r   rZ   "Optional[Callable[[float], float]]r;   int)F)rf   r   rg   boolreturnr   )r   r   )NN)
r{   zIterable[str]r|   Optional[List[dict]]r}   Optional[List[str]]r~   r   r   	List[str])NF)r-   r   r/   Optional[bool]r   r   )NN   `   NL2N)r   r   r   r   r   Optional[int]r   r   r   r   r   r   r-   r   r   None)rT   rB   r   rB   )
r   r   r|   r   r}   r   r~   r   r   r   )NNN)r   r   r   r   r   r   r-   r   r~   r   r   r   )r   zCallable[[float], float])rS   List[float]r   r   r   Optional[Dict[str, str]]r-   r   r~   r   r   r   )
r   rB   r   r   r   r   r~   r   r   r   )NNNF)r   rB   r   r   r-   r   r   r   r   r   r~   r   r   List[Document])N   g      ?N)r   rB   r   r   r   r   r   r   r   r   r~   r   r   r   )rS   r   r   r   r   r   r   r   r   r   r~   r   r   r   )NNr"   r#   r$   r%   NNr&   r'   NN)"r   zType[LanceDB]r{   r   rS   r   r|   r   rR   r   rU   r   rV   r   rW   r   rX   r   r+   r   r,   r   r8   r   r9   r   rY   r   rZ   r   r~   r   r   r!   )NNNNN)r}   r   r   r   r   r   r   r   r-   r   r~   r   r   r   )__name__
__module____qualname____doc__	DEFAULT_Kr[   rl   propertyro   r   rP   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r   r   r   r   r!   r!      s   : %)*.+$, $"($1!% $)#"&"&AE!VG!VG (VG 	VG
 "VG VG  VG "VG VG VG VG VG  VG  VG ?VG  !VGp D   +/#'	.. (. !	.
 . 
.b IN!7E	F #'$((+)+*. $"(F(F "(F &	(F
 '(F ((F (F (F 
(FTG +/#'	22 (2 !	2
 2 
2n   $"$$ $ 	$
 $ $ 
$L8  +/"KK K )	K
 K K 
K(  +/"

 
 )	

 
 
 

2  +/	&:&: &: )	&:
 &: 
&:V  " $#  	
    
@   +/** * 	*
 * )* * 
*^   +/+ +  +  	+ 
 +  )+  +  
+ Z 
 +/$($, $"($1!% $)"&"&AE### # (	#
 "# "# #  # "# # # #  #  # ?#  !#" 
## #N $(%) $,0"!W !W #!W 	!W
 *!W !W !W 
!Wr   r!   )r   r   )r   zDict[str, str]r   rB   )
__future__r   r   r6   rs   rI   typingr   r   r   r   r   r	   r
   numpyr   langchain_core.documentsr   langchain_core.embeddingsr   langchain_core.utilsr   langchain_core.vectorstoresr   &langchain_community.vectorstores.utilsr   r   r   r   r!   r   r   r   <module>r     sO    "  	   F F F  - 0 - 3 M	#
E
Y
Wk Y
Wr   