
    h                        d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlZd dlmZ ddlmZ dd	lmZmZmZ dd
lmZmZmZmZ ddlmZ h dZ e       r
d dlZd dlmZ  e       rd dlZ ej@                  e!      Z"d Z#d Z$d Z%d Z&d Z'defdZ(	 	 	 	 d#dZ) G d de      Z* G d de*      Z+ G d de*      Z, G d d      Z- G d d       Z. G d! d"      Z/y)$    N)partial)Pool	cpu_count)
ThreadPool)Optional)tqdm   )whitespace_tokenize)BatchEncodingPreTrainedTokenizerBaseTruncationStrategy)is_tf_availableis_torch_availableis_torch_hpu_availablelogging   )DataProcessor>   bartmpnetroberta	camembert)TensorDatasetc                     dj                  |j                  |            }t        ||dz         D ];  }t        ||dz
  d      D ]&  }dj                  | ||dz          }||k(  s ||fc c S  = ||fS )zFReturns tokenized answer spans that better match the annotated answer. r   )jointokenizerange)	
doc_tokensinput_start	input_end	tokenizerorig_answer_texttok_answer_text	new_startnew_end	text_spans	            `/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/data/processors/squad.py_improve_answer_spanr)   -   s    hhy112BCDO;	A6 ,	Y	Ar: 	,GI1!FGIO+!7++	,, ##    c                    d}d}t        |       D ]s  \  }}|j                  |j                  z   dz
  }||j                  k  r2||kD  r8||j                  z
  }||z
  }	t        ||	      d|j                  z  z   }
||
|kD  sp|
}|}u ||k(  S ):Check if this is the 'max context' doc span for the token.Nr   {Gz?)	enumeratestartlengthmin	doc_spanscur_span_indexposition
best_scorebest_span_index
span_indexdoc_spanendnum_left_contextnum_right_contextscores              r(   _check_is_max_contextr>   :   s    JO )) 4 )
Hnnx.2hnn$c>#hnn4(N$&784(//;QQ!3J(O) _,,r*   c                     d}d}t        |       D ]P  \  }}|d   |d   z   dz
  }||d   k  r||kD  r#||d   z
  }||z
  }	t        ||	      d|d   z  z   }
||
|kD  sM|
}|}R ||k(  S )r,   Nr/   r0   r   r-   )r.   r1   r2   s              r(   _new_check_is_max_contextr@   N   s     JO )) 4 )
Hw(8"44q8hw''c>#hw&77(N$&784(8BT;TT!3J(O) _,,r*   c                 J    | dk(  s| dk(  s| dk(  s| dk(  st        |       dk(  ryy)Nr   	
i/   TF)ord)cs    r(   _is_whitespacerG   d   s,    Cx19T	Q$Y#a&F:Jr*   c                 r   g }|r| j                   s| j                  }| j                  }dj                  | j                  ||dz          }	dj                  t        | j                              }
|	j                  |
      dk(  rt        j                  d|	 d|
 d       g S g }g }g }t        | j                        D ]  \  }}|j                  t        |             t        j                  j                  dv rt        j!                  |d	      }nt        j!                  |      }|D ]$  }|j                  |       |j                  |       &  |r| j                   sx|| j                     }| j                  t        | j                        dz
  k  r|| j                  dz      dz
  }nt        |      dz
  }t#        |||t        | j                        \  }}g }t        j%                  | j&                  d
d|      }t)        t              j                  j+                  dd      j-                         }|t.        v r$t        j0                  t        j2                  z
  dz   n t        j0                  t        j2                  z
  }t        j0                  t        j4                  z
  }|}t        |      |z  t        |      k  r9t        j6                  dk(  r|}|}t8        j:                  j<                  }n|}|}t8        j>                  j<                  }t        jA                  |||||d||z
  t        |      z
  |z
  d      }tC        t        |      t        |      |z  z
  |t        |      z
  |z
        }t        jD                  |d   v rt        j6                  dk(  r)|d   d |d   jG                  t        jD                         }nKt        |d         dz
  |d   d d d   jG                  t        jD                        z
  } |d   | dz   d  }n|d   }t        jI                  |      }!i }"tK        |      D ]?  }t        j6                  dk(  rt        |      |z   |z   n|}#|t        |      |z  |z      |"|#<   A ||d<   |!|d<   |"|d<   t        |      |z   |d<   i |d<   t        |      |z  |d<   ||d<   |j                  |       d|vsd|v rt        |d         dk(  rn!|d   }t        |      |z  t        |      k  r9tK        t        |            D ]V  }$tK        ||$   d         D ]@  }%tM        ||$|$|z  |%z         }&t        j6                  dk(  r|%n
||$   d   |%z   }#|&||$   d   |#<   B X |D ]  }'|'d   jG                  t        jN                        }(tQ        jR                  |'d         })t        j6                  dk(  rd|)t        |      |z   d  nd|)t        |'d          t        |      |z     tQ        jT                  tQ        jV                  |'d   t        jD                  k(              }*tQ        jX                  t        j[                  |'d   d            j]                         }+d|)|*<   d|)|+<   d|)|(<   | j                   },d}d}|r`|,s^|'d   }-|'d   |'d   z   dz
  }.d
}/|-k\  r|.k  sd}/|/r|(}|(}d},n4t        j6                  dk(  rd}0nt        |      |z   }0||-z
  |0z   }|-z
  |0z   }|j                  t_        |'d   |'d   |'d   |(|)ja                         dd|'d   |'d   |'d   |'d   |||,| jb                                |S )Nr   r   r   zCould not find answer: 'z' vs. '')RobertaTokenizerLongformerTokenizerBartTokenizerRobertaTokenizerFastLongformerTokenizerFastBartTokenizerFastT)add_prefix_spaceF)add_special_tokens
truncation
max_length	Tokenizer right)rR   paddingrS   return_overflowing_tokensstridereturn_token_type_ids	input_idsparagraph_lentokenstoken_to_orig_map*truncated_query_with_special_tokens_lengthtoken_is_max_contextr/   r0   overflowing_tokensr   lefttoken_type_ids)already_has_special_tokensattention_mask)
example_index	unique_idr\   r`   r]   r^   start_positionend_positionis_impossibleqas_id)2rj   rh   ri   r   r   r
   answer_textfindloggerwarningr.   appendlenr"   	__class____name__r   r)   encodequestion_texttypereplacelowerMULTI_SEP_TOKENS_TOKENIZERS_SETmodel_max_lengthmax_len_single_sentencemax_len_sentences_pairpadding_sider   ONLY_SECONDvalue
ONLY_FIRSTencode_plusr1   pad_token_idindexconvert_ids_to_tokensr   r@   cls_token_idnp	ones_likewhere
atleast_1dasarrayget_special_tokens_masknonzeroSquadFeaturestolistrk   )1examplemax_seq_length
doc_stridemax_query_lengthpadding_strategyis_trainingfeaturesrh   ri   actual_textcleaned_answer_texttok_to_orig_indexorig_to_tok_indexall_doc_tokensitoken
sub_tokens	sub_tokentok_start_positiontok_end_positionspanstruncated_querytokenizer_typesequence_added_tokenssequence_pair_added_tokensspan_doc_tokenstextspairsrR   encoded_dictr\   non_padded_idslast_padding_id_positionr]   r^   r   doc_span_indexjis_max_contextspan	cls_indexp_maskpad_token_indicesspecial_token_indicesspan_is_impossible	doc_startdoc_endout_of_span
doc_offsets1                                                    r(   !squad_convert_example_to_featuresr   j   s    H700 //++ hhw11.LSTDTVW!hh':7;N;N'OP/0B6NN5k]'J]I^^_`aINg001 -5  ^!45'' ,
 
 #++ED+IJ"++E2J# 	-I$$Q'!!),	--" 700.w/E/EF#g&8&8"9A"==01E1E1IJQN">2Q61E.0@)WM`M`2
.	- E&&%DUe ' O )_--55k2FLLNN << 	""Y%F%FFJ'')*K*KK 
 "+!;!;i>^>^!^$O
e*z
!C$7
7!!W,#E#E+77==J#E#E+66<<J ,,!$%&*!J._1EEHbb"& - 	
 #e*z"99S114NN

 !!\+%>>%%0!-k!:;t\+=V=\=\]f]s]s=t!u [12Q6k9RSWUWSW9X9^9^_h_u_u9vv ) ".k!:;SVW;W;Y!Z *+6N00@}% 	VAHQH^H^biHiC(+@@1DopE'8Uj9PST9T'Ue$	V )6_%!'X,=()EHEY\qEqAB/1+, #E
Z 7W!.X\"|3 L0SFZ9[5\`a5a&';<y e*z
!C$7
7|  E
+ Ru^,_=> 	RA6unn_iNilmNmnN ))V3 >*+WX[\\ 
 DRE.!"89%@	RR  B
%++I,B,BC	 d#345!!W,EFF3'*??AB]^FCX''C,@CX,X*YZHHR]]43D	H^H^3^%_` "

--d;.?\`-a!

') 	 %& !()$% y$221 WI7md8n4q8GK&)38HG8S"!*(%)"))V3!"J!$_!58M!MJ!3i!?*!L/);jH[!%&%&"?3%)*@%AH~"&':";-)0~~	
aB
F Or*   tokenizer_for_convertc                     | a y N)r"   )r   s    r(   &squad_convert_example_to_features_initr   8  s    %Ir*   c
           
      ,    t        |t                     }t               rt        nt        }
 |
|t
        |f      5 }t        t        |||||      }t        t        |j                  || d      t        |       d|	              ddd       g }d}d	}t         t               d
|	       D ]5  }|s|D ]&  }||_        ||_        |j                  |       |dz  }( |dz  }7 | ~|dk(  r]t               st!        d      t#        j$                   D cg c]  }|j&                   c}t"        j(                        }t#        j$                   D cg c]  }|j*                   c}t"        j(                        }t#        j$                   D cg c]  }|j,                   c}t"        j(                        }t#        j$                   D cg c]  }|j.                   c}t"        j(                        }t#        j$                   D cg c]  }|j0                   c}t"        j2                        }t#        j$                   D cg c]  }|j4                   c}t"        j2                        }|sHt#        j6                  |j9                  d	      t"        j(                        }t;        ||||||      } |fS t#        j$                   D cg c]  }|j<                   c}t"        j(                        }t#        j$                   D cg c]  }|j>                   c}t"        j(                        }t;        ||||||||      } |fS |dk(  rtA               st!        d       fd}d|jB                  v rptD        jF                  tD        jF                  tD        jF                  tD        jH                  tD        jJ                  dtD        jH                  tD        jH                  tD        jH                  tD        jF                  tD        jF                  df}tE        jL                  dg      tE        jL                  dg      tE        jL                  dg      tE        jL                  g       tE        jL                  g       dtE        jL                  g       tE        jL                  g       tE        jL                  g       tE        jL                  dg      tE        jL                  g       df}nJtD        jF                  tD        jF                  tD        jH                  tD        jJ                  dtD        jH                  tD        jH                  tD        jH                  tD        jF                  tD        jF                  df}tE        jL                  dg      tE        jL                  dg      tE        jL                  g       tE        jL                  g       dtE        jL                  g       tE        jL                  g       tE        jL                  g       tE        jL                  dg      tE        jL                  g       df}tD        jN                  jP                  jS                  |||      S  S # 1 sw Y   xY wc c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )a  
    Converts a list of examples into a list of features that can be directly given as input to a model. It is
    model-dependant and takes advantage of many of the tokenizer's features to create the model's inputs.

    Args:
        examples: list of [`~data.processors.squad.SquadExample`]
        tokenizer: an instance of a child of [`PreTrainedTokenizer`]
        max_seq_length: The maximum sequence length of the inputs.
        doc_stride: The stride used when the context is too large and is split across several features.
        max_query_length: The maximum length of the query.
        is_training: whether to create features for model evaluation or model training.
        padding_strategy: Default to "max_length". Which padding strategy to use
        return_dataset: Default False. Either 'pt' or 'tf'.
            if 'pt': returns a torch.data.TensorDataset, if 'tf': returns a tf.data.Dataset
        threads: multiple processing threads.


    Returns:
        list of [`~data.processors.squad.SquadFeatures`]

    Example:

    ```python
    processor = SquadV2Processor()
    examples = processor.get_dev_examples(data_dir)

    features = squad_convert_examples_to_features(
        examples=examples,
        tokenizer=tokenizer,
        max_seq_length=args.max_seq_length,
        doc_stride=args.doc_stride,
        max_query_length=args.max_query_length,
        is_training=not evaluate,
    )
    ```)initializerinitargs)r   r   r   r   r       )	chunksizez"convert squad examples to features)totaldescdisableNi ʚ;r   zadd example index and unique idr   ptz6PyTorch must be installed to return a PyTorch dataset.)dtypetfz<TensorFlow must be installed to return a TensorFlow dataset.c               3     K   t              D ]  \  } }|j                  b|j                  |j                  | |j                  d|j
                  |j                  |j                  |j                  |j                  df t|j                  |j                  |j                  | |j                  d|j
                  |j                  |j                  |j                  |j                  df  y w)Nr[   re   feature_indexrk   start_positionsend_positionsr   r   rj   r[   re   rc   r   rk   )
r.   rc   r[   re   rk   rh   ri   r   r   rj   )r   exr   s     r(   genz/squad_convert_examples_to_features.<locals>.gen  s     "8, !2$$, *,.0.?.?-.&(ii	 02/@/@-/__)+&(ii-/-=-= $ *,.0.?.?.0.?.?-.&(ii 02/@/@-/__)+&(ii-/-=-= %!s   C/C2rc   r   r   r   )*r1   r   r   r   r   r   r   r   listr   imaprq   rf   rg   rp   r   RuntimeErrortorchtensorr[   longre   rc   r   r   floatrj   arangesizer   rh   ri   r   model_input_namesr   int32int64stringTensorShapedataDatasetfrom_generator)!examplesr"   r   r   r   r   r   return_datasetthreadstqdm_enabledpool_clsp	annotate_new_featuresrg   rf   example_featuresexample_featurefall_input_idsall_attention_masksall_token_type_idsall_cls_index
all_p_maskall_is_impossibleall_feature_indexdatasetall_start_positionsall_end_positionsr   train_typestrain_shapesr   s!                                   @r(   "squad_convert_examples_to_featuresr   =  s/   ` '9;'G35z4H	''MYbXd	e 
ij-)!--#
	 y(b9(m9((	

$ LIM H,M[gWg 
  / 	O,9O)(1O%0NI		
 	
 H!#WXX 8%Daakk%DEJJW#llh+OA,<,<+OW\WaWab"\\X*N1+;+;*NV[V`V`a8%Daakk%DEJJW\\X">188">ekkR
!LL8)La!//)LTYT_T_` %]-?-?-B%** U#24FHY[hjtG"    #(,,(/SQ0@0@/S[`[e[e"f %h-Oann-OW\WaWa b##"#!!	G   	4	 ]^^"	J y::: "$&(hh&(hh%'XX ii (*xx%'XX!# hh%'XXK& "$!7&(nndV&<&(nndV&<%'^^B%7 nnR0 (*~~b'9%'^^B%7!#!3 nndV4%'^^B%7L$ !hh"((UWU]U]ikirirs')xx%'XX!# hh%'XX	K "$!7&(nndV&<%'^^B%7 nnR0	 (*~~b'9%'^^B%7!#!3 nndV4%'^^B%7L  ww--c;MMW
 
N &E+O*N%D">)L 0T-Os<   A
Y!Y.Y3Y8Y=
ZZZZ!Y+c                   >    e Zd ZdZdZdZddZddZd	dZd	dZ	d Z
y)
SquadProcessorz
    Processor for the SQuAD data set. overridden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
    version 2.0 of SQuAD, respectively.
    Nc           
      v   |sD|d   d   d   j                         j                  d      }|d   d   d   j                         }g }n\t        |d   d   |d   d         D cg c]5  \  }}|j                         |j                         j                  d      d7 }}}d }d }t        |d   j                         j                  d      |d   j                         j                  d      |d	   j                         j                  d      |||d
   j                         j                  d      |      S c c}}w )Nanswerstextr   utf-8answer_start)r  r   idquestioncontexttitle)rk   ru   context_textrl   start_position_characterr  r   )numpydecodezipSquadExample)selftensor_dictevaluateanswerr  r   r/   r   s           r(   _get_example_from_tensor_dictz,SquadProcessor._get_example_from_tensor_dict&  sB    +F3A6<<>EEgNF&y1.A!DJJLLG $'{9'=n'M{[dOeflOm#nE4 "'

8K8KG8TUG 
 FLt$**,33G<%j1779@@I$Y/557>>wG%1g&,,.55g>
 	
s   ":D5c                     |r|d   }n|d   }g }t        |      D ]$  }|j                  | j                  ||             & |S )av  
        Creates a list of [`~data.processors.squad.SquadExample`] using a TFDS dataset.

        Args:
            dataset: The tfds dataset loaded from *tensorflow_datasets.load("squad")*
            evaluate: Boolean specifying if in evaluation mode or in training mode

        Returns:
            List of SquadExample

        Examples:

        ```python
        >>> import tensorflow_datasets as tfds

        >>> dataset = tfds.load("squad")

        >>> training_examples = get_examples_from_dataset(dataset, evaluate=False)
        >>> evaluation_examples = get_examples_from_dataset(dataset, evaluate=True)
        ```
validationtrain)r  )r   rp   r  )r  r   r  r   r  s        r(   get_examples_from_datasetz(SquadProcessor.get_examples_from_dataset>  sY    , l+Gg&G= 	`KOOD>>{U]>^_	` r*   c                 *   |d}| j                   t        d      t        t        j                  j                  ||| j                   n|      dd      5 }t        j                  |      d   }ddd       | j                  d      S # 1 sw Y   xY w)	a  
        Returns the training examples from the data directory.

        Args:
            data_dir: Directory containing the data files used for training and evaluating.
            filename: None by default, specify this if the training file has a different name than the original one
                which is `train-v1.1.json` and `train-v2.0.json` for squad versions 1.1 and 2.0 respectively.

        NrU   NSquadProcessor should be instantiated via SquadV1Processor or SquadV2Processorrr   encodingr   r  )	
train_file
ValueErroropenospathr   jsonload_create_examplesr  data_dirfilenamereader
input_datas        r(   get_train_examplesz!SquadProcessor.get_train_examples_  s     H??"mnnGGLLh6F4??HUWZel
 	36*62J	3 $$Z99		3 	3   B		Bc                 *   |d}| j                   t        d      t        t        j                  j                  ||| j                   n|      dd      5 }t        j                  |      d   }ddd       | j                  d      S # 1 sw Y   xY w)	a  
        Returns the evaluation example from the data directory.

        Args:
            data_dir: Directory containing the data files used for training and evaluating.
            filename: None by default, specify this if the evaluation file has a different name than the original one
                which is `dev-v1.1.json` and `dev-v2.0.json` for squad versions 1.1 and 2.0 respectively.
        NrU   r  r  r   r  r   dev)	dev_filer  r  r  r  r   r  r   r!  r"  s        r(   get_dev_exampleszSquadProcessor.get_dev_examplesu  s     H== mnnGGLLH4D4==(SUXcj
 	36*62J	3 $$Z77		3 	3r(  c                 4   |dk(  }g }t        |      D ]  }|d   }|d   D ]s  }|d   }|d   D ]d  }	|	d   }
|	d   }d }d }g }|	j                  dd	      }|s|r|	d
   d   }|d   }|d   }n|	d
   }t        |
|||||||      }|j                  |       f u  |S )Nr  r  
paragraphsr  qasr  r  rj   Fr   r   r   r  )rk   ru   r  rl   r  r  rj   r   )r   getr  rp   )r  r&  set_typer   r   entryr  	paragraphr  qark   ru   r  rl   r   rj   r  r   s                     r(   r!  zSquadProcessor._create_examples  s    ')*% 	-E'NE"<0 -	(3#E* -BXF$&zNM/3,"&K G$&FF?E$BM(&%'	]1%5F*0.K7=n7M4&(mG*%&3%1$/1I#&3 '	G OOG,5--	-> r*   )Fr   )rs   
__module____qualname____doc__r  r+  r  r  r'  r,  r!   r*   r(   r   r     s-    
 JH
0B:,8*"r*   r   c                       e Zd ZdZdZy)SquadV1Processorztrain-v1.1.jsonzdev-v1.1.jsonNrs   r5  r6  r  r+  r8  r*   r(   r:  r:        "JHr*   r:  c                       e Zd ZdZdZy)SquadV2Processorztrain-v2.0.jsonzdev-v2.0.jsonNr;  r8  r*   r(   r>  r>    r<  r*   r>  c                       e Zd ZdZg dfdZy)r  aT  
    A single training/test example for the Squad dataset, as loaded from disk.

    Args:
        qas_id: The example's unique identifier
        question_text: The question string
        context_text: The context string
        answer_text: The answer string
        start_position_character: The character position of the start of the answer
        title: The title of the example
        answers: None by default, this is used during evaluation. Holds answers as well as their start positions.
        is_impossible: False by default, set to True if the example has no possible answer.
    Fc	                    || _         || _        || _        || _        || _        || _        || _        d\  | _        | _        g }	g }
d}| j                  D ]P  }t        |      rd}n#|r|	j                  |       n|	dxx   |z  cc<   d}|
j                  t        |	      dz
         R |	| _        |
| _        |=|s:|
|   | _        |
t        |t        |      z   dz
  t        |
      dz
           | _        y y y )N)r   r   Tr   Fr   )rk   ru   r  rl   r  rj   r   rh   ri   rG   rp   rq   r   char_to_word_offsetr1   )r  rk   ru   r  rl   r  r  r   rj   r   rA  prev_is_whitespacerF   s                r(   __init__zSquadExample.__init__  s     *(&
*15.T.
 ! "" 		<Aa %)"%%%a(rNa'N%*"&&s:':;		< %#6  $/"56N"OD 3,s;/??!CSI\E]`aEab!D 9F/r*   Nrs   r5  r6  r7  rC  r8  r*   r(   r  r    s    , -r*   r  c                   2    e Zd ZdZ	 	 ddee   dee   fdZy)r   a  
    Single squad example features to be fed to a model. Those features are model-specific and can be crafted from
    [`~data.processors.squad.SquadExample`] using the
    :method:*~transformers.data.processors.squad.squad_convert_examples_to_features* method.

    Args:
        input_ids: Indices of input sequence tokens in the vocabulary.
        attention_mask: Mask to avoid performing attention on padding token indices.
        token_type_ids: Segment token indices to indicate first and second portions of the inputs.
        cls_index: the index of the CLS token.
        p_mask: Mask identifying tokens that can be answers vs. tokens that cannot.
            Mask with 1 for tokens than cannot be in the answer and 0 for token that can be in an answer
        example_index: the index of the example
        unique_id: The unique Feature identifier
        paragraph_len: The length of the context
        token_is_max_context:
            List of booleans identifying which tokens have their maximum context in this feature object. If a token
            does not have their maximum context in this feature object, it means that another feature object has more
            information related to that token and should be prioritized over this feature for that token.
        tokens: list of tokens corresponding to the input ids
        token_to_orig_map: mapping between the tokens and the original text, needed in order to identify the answer.
        start_position: start of the answer token index
        end_position: end of the answer token index
        encoding: optionally store the BatchEncoding with the fast-tokenizer alignment methods.
    Nrk   r  c                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        || _        || _        || _        || _        || _        y r   )r[   re   rc   r   r   rf   rg   r\   r`   r]   r^   rh   ri   rj   rk   r  )r  r[   re   rc   r   r   rf   rg   r\   r`   r]   r^   rh   ri   rj   rk   r  s                    r(   rC  zSquadFeatures.__init__  s}    & #,,"*"*$8!!2,(* r*   )NN)rs   r5  r6  r7  r   strr   rC  r8  r*   r(   r   r     s2    T !%,0#%!  !%!" =)#%!r*   r   c                       e Zd ZdZddZy)SquadResultaJ  
    Constructs a SquadResult which can be used to evaluate a model's output on the SQuAD dataset.

    Args:
        unique_id: The unique identifier corresponding to that example.
        start_logits: The logits corresponding to the start of the answer
        end_logits: The logits corresponding to the end of the answer
    Nc                 ^    || _         || _        || _        |r|| _        || _        || _        y y r   )start_logits
end_logitsrg   start_top_indexend_top_index
cls_logits)r  rg   rK  rL  rM  rN  rO  s          r(   rC  zSquadResult.__init__E  s7    ($"#2D !.D(DO r*   )NNNrD  r8  r*   r(   rI  rI  ;  s    )r*   rI  )rS   Fr   T)0r  r  	functoolsr   multiprocessingr   r   multiprocessing.poolr   typingr   r  r   r   models.bert.tokenization_bertr
   tokenization_utils_baser   r   r   utilsr   r   r   r   r   ry   r   torch.utils.datar   
tensorflowr   
get_loggerrs   rn   r)   r>   r@   rG   r   r   r   r   r:  r>  r  r   rI  r8  r*   r(   <module>rZ     s     	  + +    @ a a Y Y   #L  .			H	%
$-(-,K\&BY & "]@O] Od~ 
~ 
< <~@! @!F) )r*   