
    hb                        d dl mZ d dlZd dlmZmZmZmZ d dlm	Z	m
Z
  G d de
      Z	 	 	 	 	 	 	 	 d
dZ G d d	e
      Zy)    )annotationsN)AnyLiteralOptionalUnion)LanguageTextSplitterc                  @     e Zd ZdZ	 	 d	 	 	 	 	 	 	 d fdZddZ xZS )CharacterTextSplitterz(Splitting text that looks at characters.c                @    t        |   di | || _        || _        y)Create a new TextSplitter.N )super__init__
_separator_is_separator_regex)self	separatoris_separator_regexkwargs	__class__s       `/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_text_splitters/character.pyr   zCharacterTextSplitter.__init__   s$     	"6"##5     c                F     j                   r j                  nt        j                   j                        }t	        || j
                        }d} j                   xr t         fd|D              }d} j
                  s|s j                  } j                  ||      S )z=Split into chunks without re-inserting lookaround separators.keep_separator)z(?=z(?<!z(?<=z(?!c              3  T   K   | ]  }j                   j                  |       ! y w)N)r   
startswith).0pr   s     r   	<genexpr>z3CharacterTextSplitter.split_text.<locals>.<genexpr>%   s%      9
./DOO&&q)9
s   %( )r   r   reescape_split_text_with_regex_keep_separatorany_merge_splits)r   textsep_patternsplitslookaround_prefixesis_lookaround	merge_seps   `      r   
split_textz CharacterTextSplitter.split_text   s      $77DOORYYt=W 	
 (+d.B.B

 =00 
S 9
3F9
 6
 	$$I !!&)44r   )

F)r   strr   boolr   r   returnNoner)   r1   r3   	list[str])__name__
__module____qualname____doc__r   r/   __classcell__r   s   @r   r   r   	   s>    2  #(	6	6 !	6 		6
 
	65r   r   c                  |r|rt        j                  d| d|       }|dk(  r3t        dt        |      dz
  d      D cg c]  }||   ||dz      z    c}n/t        dt        |      d      D cg c]  }||   ||dz      z    c}}t        |      dz  dk(  r||dd  z  }|dk(  r	g ||d   n|d   g|}n"t        j                  ||       }nt	        |       }|D cg c]
  }|dk7  s	| c}S c c}w c c}w c c}w )	N()endr         r"   )r#   splitrangelenlist)r)   r   r   _splitsir+   ss          r   r%   r%   4   s'    hh9+Q/6G "U* 8=QGq@PRS7TU!'!*wq1u~-U<A!S\ST<UVqwqzGAEN2V 
 7|a1$'"#,& "U* (6'72;'qz+F+  XXi.Fd)!bA)) VV *s   C//C4
C9(C9c                  ~     e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 	 	 d fdZd	dZd
dZe	 	 	 	 	 	 dd       Ze	dd       Z
 xZS )RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    c                P    t        |   dd|i| |xs g d| _        || _        y)r   r   )r0   
 r"   Nr   )r   r   _separatorsr   )r   
separatorsr   r   r   r   s        r   r   z'RecursiveCharacterTextSplitter.__init__V   s0     	AA&A%@)@#5 r   c                   g }|d   }g }t        |      D ]S  \  }}| j                  r|nt        j                  |      }|dk(  r|} n$t        j                  ||      sI|}||dz   d } n | j                  r|nt        j                  |      }t        ||| j                        }	g }
| j                  rdn|}|	D ]  }| j                  |      | j                  k  r|
j                  |       3|
r%| j                  |
|      }|j                  |       g }
|s|j                  |       n| j                  ||      }|j                  |        |
r#| j                  |
|      }|j                  |       |S )z&Split incoming text and return chunks.rC   r"   rA   Nr   )	enumerater   r#   r$   searchr%   r&   _length_function_chunk_sizeappendr(   extend_split_text)r   r)   rQ   final_chunksr   new_separatorsrI   _sr   r+   _good_splitsrJ   merged_text
other_infos                 r   rY   z*RecursiveCharacterTextSplitter._split_textb   sz   rN	z* 	EAr#77RYYr]JRx	yyT*	!+AEG!4	 #'":":Y		)@T
'*T-A-A

 //RY
 	4A$$Q'$*:*::##A&"&"4"4\:"NK ''4#%L% ''*!%!1!1!^!DJ ''
3	4 ,,\:FK,r   c                :    | j                  || j                        S )zSplit the input text into smaller chunks based on predefined separators.

        Args:
            text (str): The input text to be split.

        Returns:
            List[str]: A list of text chunks obtained after splitting.
        )rY   rP   )r   r)   s     r   r/   z)RecursiveCharacterTextSplitter.split_text   s     d&6&677r   c                :    | j                  |      } | d|dd|S )a  Return an instance of this class based on a specific language.

        This method initializes the text splitter with language-specific separators.

        Args:
            language (Language): The language to configure the text splitter for.
            **kwargs (Any): Additional keyword arguments to customize the splitter.

        Returns:
            RecursiveCharacterTextSplitter: An instance of the text splitter configured
            for the specified language.
        T)rQ   r   r   )get_separators_for_language)clslanguager   rQ   s       r   from_languagez,RecursiveCharacterTextSplitter.from_language   s)      44X>
LjTLVLLr   c                p   | t         j                  t         j                  fv rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j
                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg d	S | t         j                  k(  rg d
S | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                  k(  rg dS | t         j                   k(  rg dS | t         j"                  k(  rg dS | t         j$                  k(  rg dS | t         j&                  k(  rg dS | t         j(                  k(  rg dS | t         j*                  k(  rg dS | t         j,                  k(  rg dS | t         j.                  k(  rg dS | t         j0                  k(  rg dS | t         j2                  k(  rg dS | t         j4                  k(  r*d}d| dd| dd| dd| dd| dd d!d"d#d$d%d&d'd(d)d*d+d,gS | t         j6                  v rd-|  d.}t9        |      d-|  d/t;        t                }t9        |      )0a
  Retrieve a list of separators specific to the given language.

        Args:
            language (Language): The language for which to get the separators.

        Returns:
            List[str]: A list of separators appropriate for the specified language.
        )
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r0   rN   rO   r"   )
func 
var 
const 
type rh   ri   rk   rl   r0   rN   rO   r"   )rg   
public 
protected 	
private 
static rh   ri   rj   rk   rl   r0   rN   rO   r"   )rg   rq   rr   rs   z

internal z
companion z
fun 
val rn   rh   ri   rj   z
when rl   
else r0   rN   rO   r"   )

function ro   
let rn   rg   rh   ri   rj   rk   rl   	
default r0   rN   rO   r"   )
enum 
interface z
namespace rp   rg   rw   ro   rx   rn   rh   ri   rj   rk   rl   ry   r0   rN   rO   r"   )rw   rg   rh   	
foreach rj   
do rk   rl   r0   rN   rO   r"   )
z	
message z	
service rz   z
option 
import z
syntax r0   rN   rO   r"   )rg   
def z
	def r0   rN   rO   r"   )z
=+
z
-+
z
\*+
z

.. *

r0   rN   rO   r"   )r   rg   rh   
unless rj   ri   r}   z
begin z
rescue r0   rN   rO   r"   )r   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop rh   r   rj   rl   z
cond z
with ri   r}   r0   rN   rO   r"   )z
fn ro   rx   rh   rj   ri   z
loop 
match ro   r0   rN   rO   r"   )rg   z
object r   ru   rn   rh   ri   rj   r   rl   r0   rN   rO   r"   )rm   rg   
struct rz   rh   ri   rj   r}   rk   rl   r0   rN   rO   r"   )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r0   rN   rO   r"   )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rO   r"   )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler"   )r{   rz   z
implements z

delegate 
event rg   z

abstract rq   rr   rs   rt   z
return rh   z

continue ri   r|   rj   rk   z
break rl   rv   
try z
throw 	
finally 
catch r0   rN   rO   r"   )z
pragma z
using z

contract r{   z	
library z
constructor rp   rw   r   z

modifier z
error r   rz   rh   ri   rj   z

do while z

assembly r0   rN   rO   r"   )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rN   rO   r"   )
z
local rw   rh   ri   rj   z
repeat r0   rN   rO   r"   )z	
main :: z
main = rx   z
in r}   z
where 
:: z
= 
data z	
newtype rp   r   z
module r~   z
qualified z
import qualified rg   z

instance rl   z
| r   z
= {z
, r0   rN   rO   r"   )rw   z
param rh   r|   ri   rj   rk   rg   r   r   r   r0   rN   rO   r"   z*(?:Public|Private|Friend|Global|Static)\s+z\n(?!End\s)z?Sub\s+z?Function\s+z?Property\s+(?:Get|Let|Set)\s+z?Type\s+z?Enum\s+z\n(?!End\s)If\s+z\nElseIf\s+z	\nElse\s+z\nSelect\s+Case\s+z	\nCase\s+z\nFor\s+z\nDo\s+z
\nWhile\s+z	\nWith\s+z\n\nz\nrO   r"   z	Language z is not implemented yet!z& is not supported! Please choose from )r   CCPPGOJAVAKOTLINJSTSPHPPROTOPYTHONRSTRUBYELIXIRRUSTSCALASWIFTMARKDOWNLATEXHTMLCSHARPSOLCOBOLLUAHASKELL
POWERSHELLVISUALBASIC6_value2member_map_
ValueErrorrG   )rd   vismsgs      r   rb   z:RecursiveCharacterTextSplitter.get_separators_for_language   s(    

HLL11 ( x{{" " x}}$ ( x& 2 x{{" ( x{{" 2 x||# $ x~~% & x&
 
 x||#  x}}$ $ x& . x}}$ $ x~~% ( x~~% ( x((( & x~~% . x}}$ > x&# #H x||# : x~~% > x||#   x'''% %L x*** . x,,,?C se7+se<0se#ABse8,se8,#%+ 0 x222hZ'?@CS/!z!GXGWX 	 or   )NTF)
rQ   zOptional[list[str]]r   $Union[bool, Literal['start', 'end']]r   r2   r   r   r3   r4   )r)   r1   rQ   r6   r3   r6   r5   )rd   r   r   r   r3   rL   )rd   r   r3   r6   )r7   r8   r9   r:   r   rY   r/   classmethodre   staticmethodrb   r;   r<   s   @r   rL   rL   O   s     +/?C#(	
6'
6 =
6 !	
6
 
6 

6(T	8 MM+.M	'M M$ V	 V	r   rL   )r)   r1   r   r1   r   r   r3   r6   )
__future__r   r#   typingr   r   r   r   langchain_text_splitters.baser   r	   r   r%   rL   r   r   r   <module>r      sT    " 	 0 0 @(5L (5V*
**2V**6r
\ r
r   