
    hL                       d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ ddlmZmZmZmZmZ ddlmZ ddlm Z m!Z!m"Z" ddl#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,  e%jZ                  e.      Z/dZ0e e$d       G d de                    Z1e e$d       G d de                    Z2 G d dejf                        Z4 G d dejf                        Z5 G d d ejf                        Z6 G d! d"ejf                        Z7 G d# d$ejf                        Z8 G d% d&ejf                        Z9 G d' d(ejf                        Z: G d) d*ejf                        Z; G d+ d,ejf                        Z< G d- d.ejf                        Z=d/e=iZ> G d0 d1ejf                        Z? G d2 d3ejf                        Z@ G d4 d5e      ZA G d6 d7ejf                        ZB G d8 d9ejf                        ZCdWd:ZDe$ G d; d<e             ZE G d= d>eE      ZF e$d?       G d@ dAeE             ZG e$dB       G dC dDeE             ZH G dE dFejf                        ZI G dG dHejf                        ZJ G dI dJejf                        ZK e$dK       G dL dMeE             ZL e$dN       G dO dPeE             ZM G dQ dRejf                        ZN e$dS       G dT dUeE             ZOg dVZPy)XzPyTorch BridgeTower Model    N)OrderedDict)	dataclass)OptionalUnion)nn)CrossEntropyLoss   )ACT2FNQuickGELUActivation)CacheDynamicCacheEncoderDecoderCache)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputModelOutputSequenceClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging	torch_int)deprecate_kwarg   )BridgeTowerConfigBridgeTowerTextConfigBridgeTowerVisionConfigRobertaTokenizerz.
    Output type of [`BridgeTowerModel`].
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)BridgeTowerModelOutputa  
    text_features (`torch.FloatTensor` of shape `(batch_size, text_sequence_length, hidden_size)`):
        Sequence of hidden-states at the text output of the last layer of the model.
    image_features (`torch.FloatTensor` of shape `(batch_size, image_sequence_length, hidden_size)`):
        Sequence of hidden-states at the image output of the last layer of the model.
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size x 2)`):
        Concatenation of last layer hidden-state of the first token of the text and image sequence (classification
        token), respectively, after further processing through layers used for auxiliary pretraining tasks.
    Ntext_featuresimage_featurespooler_outputhidden_states
attentions)__name__
__module____qualname____doc__r%   r   torchFloatTensor__annotations__r&   r'   r(   tupler)        r/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/bridgetower/modeling_bridgetower.pyr$   r$   1   s|     26M8E--.526NHU../615M8E--.58<M8E%"3"345<59Ju00129r3   r$   z>
    Output type of ['BridgeTowerForContrastiveLearning']
    c                   H   e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                        ed<   dZeeej                        ed<   dZeeej                        ed<   dZeeej                        ed<   dZeeej                        ed	<   y)
BridgeTowerContrastiveOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss` is `True`):
        Image-text contrastive loss.
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    text_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`):
        The text embeddings obtained by applying the projection layer to the pooler_output.
    image_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`):
        The image embeddings obtained by applying the projection layer to the pooler_output.
    cross_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`):
        The text-image cross-modal embeddings obtained by applying the projection layer to the pooler_output.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.
    Nlosslogitstext_embedsimage_embedscross_embedsr(   r)   )r*   r+   r,   r-   r7   r   r.   r/   r0   r8   r9   r1   r:   r;   r(   r)   r2   r3   r4   r6   r6   I   s      )-D(5$$
%,*.FHU&&'.6:K% 1 123:7;L(5!2!234;7;L(5!2!234;8<M8E%"3"345<59Ju00129r3   r6   c                        e Zd Z fdZdej
                  dej
                  fdZddej
                  deej
                     fdZ xZ	S )BridgeTowerResidualAttentionc                 h   t         |           t        j                  |j                  |j                  dz        | _        t        j                  |j                  |j                        | _        t        j                  t        dt        j                  |j                  |j                  dz        fdt               fdt        j                  |j                  dz  |j                        fg            | _        t        j                  |j                  |j                        | _        d | _        y )N@   epsc_fc   geluc_proj)super__init__r   MultiheadAttentionhidden_sizeattn	LayerNormlayer_norm_epsln_1
ModuleDictr   Linearr   mlpln_2	attn_maskselfconfig	__class__s     r4   rG   z%BridgeTowerResidualAttention.__init__j   s    ))&*<*<f>P>PTV>VW	LL!3!39N9NO	==RYYv'9'96;M;MPQ;QRS023ryy););a)?ASASTU
 LL!3!39N9NO	r3   hidden_stateattention_maskc                 ,   |+|j                  t        j                  |j                        }| j                  1| j                  j                  |j
                  |j                        nd | _        | j                  |||d| j                  |      d   S )NdtypedeviceF)need_weightsrR   key_padding_maskr   )tor.   boolr\   rR   r[   rJ   )rT   rW   rX   s      r4   	attentionz&BridgeTowerResidualAttention.attention{   s    %+..UZZH[H[.\N ~~) NNL$6$6|?R?RS 	
 yynn+  
  	r3   c                     || j                  | j                  |      |      z   }| j                  |      }| j                  j	                         D ]
  } ||      } ||z   }|S N)ra   rM   rQ   rP   values)rT   rW   rX   residual_statelayers        r4   forwardz$BridgeTowerResidualAttention.forward   sc    %tyy7NP^(__yy0XX__& 	/E .L	/%4r3   rc   )
r*   r+   r,   rG   r.   Tensorra   r   rg   __classcell__rV   s   @r4   r=   r=   i   sC    "ell ELL "ELL (5<<BX r3   r=   c                   ^     e Zd Z fdZddej
                  deej
                     fdZ xZS )BridgeTowerTransformerc                    t         |           |j                  | _        |j                  | _        |j                  rHt        j                  t        | j                  dz
        D cg c]  }t        |       c}      | _	        nDt        j                  t        | j                        D cg c]  }t        |       c}      | _	        |j                  | _
        y c c}w c c}w )Nr   )rF   rG   rI   num_hidden_layersremove_last_layerr   
ModuleListranger=   	resblocksstop_gradientrT   rU   _rV   s      r4   rG   zBridgeTowerTransformer.__init__   s    !--!'!9!9##]]?DTE[E[^_E_?`a!-f5aDN  ]]?DTE[E[?\]!-f5]DN $11 b ^s   'C,C!rW   rX   c                     g }| j                   D ]H  } |||      }| j                  r |j                  |j                                8|j                  |       J |S rc   )rr   rs   appenddetach)rT   rW   rX   r(   blocks        r4   rg   zBridgeTowerTransformer.forward   s\    ^^ 	3E ~>L!!$$\%8%8%:;$$\2	3 r3   rc   )	r*   r+   r,   rG   r.   rh   r   rg   ri   rj   s   @r4   rl   rl      s(    2ELL (5<<BX r3   rl   c                        e Zd Zdef fdZdej                  dededej                  fdZd
dej                  dej                  fd	Z
 xZS )BridgeTowerVisionEmbeddingsrU   c                    t         |           || _        |j                  | _        |j
                  | _        |j                  | _        t        j                  t        j                  | j                              | _        t        j                  |j                  | j                  | j                  | j                  d      | _        | j
                  | j                  z  dz  | _        | j                  dz   | _        t        j"                  | j                   | j                        | _        | j'                  dt        j(                  | j                         j+                  d      d       y )NF)in_channelsout_channelskernel_sizestridebias   r   position_idsr   
persistent)rF   rG   rU   rI   	embed_dim
image_size
patch_sizer   	Parameterr.   randnclass_embeddingConv2dnum_channelspatch_embeddingnum_patchesnum_positions	Embeddingposition_embeddingregister_bufferarangeexpandrS   s     r4   rG   z$BridgeTowerVisionEmbeddings.__init__   s	   ++ ++ ++!||EKK,GH!yy++?? 
 !OOt>1D!--1"$,,t/A/A4>>"R^U\\$:L:L-M-T-TU\-]jopr3   
embeddingsheightwidthreturnc                    |j                   d   dz
  }| j                  j                  j                  d      }|j                   d   dz
  }t        j
                  j                         s%||k(  r ||k(  r| j                  | j                        S |ddddf   }|ddddf   }|j                   d   }	|| j                  z  }
|| j                  z  }t        |dz        }|j                  d|||	      }|j                  dddd      }t        j                  j                  ||
|fdd	
      }|j                  dddd      j                  dd|	      }t	        j                   ||fd      S )a   
        This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
        images. This method is also adapted to support torch.jit tracing.

        Adapted from:
        - https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
        - https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
        r   r   Nr         ?r	   r   bicubicF)sizemodealign_cornersdim)shaper   weight	unsqueezer.   jit
is_tracingr   r   r   reshapepermuter   
functionalinterpolateviewcat)rT   r   r   r   r   r   r   class_pos_embedpatch_pos_embedr   
new_height	new_widthsqrt_num_positionss                r4   interpolate_pos_encodingz4BridgeTowerVisionEmbeddings.interpolate_pos_encoding   sv    !&&q)A-!44;;EEaH*003a7 yy##%+*F6UZ?**4+<+<==,QU3,QU3r"t.
T__,	&}c'9:)11!5GI[]`a)11!Q1=--33i(	 4 
 *11!Q1=BB1b#Nyy/?;CCr3   pixel_valuesc                 `   |j                   \  }}}}|sJ|| j                  k7  s|| j                  k7  r,t        d| d| d| j                   d| j                   d	      | j                  j                  j
                  }| j                  |j                  |            }|j                  d      j                  dd      }| j                  j                  |dd      }	t        j                  |	|gd	      }
|r|
| j                  |
||      z   }
|
S |
| j                  | j                        z   }
|
S )
NzInput image size (*z) doesn't match model (z).r[   r   r   r   r   )r   r   
ValueErrorr   r   r[   r_   flatten	transposer   r   r.   r   r   r   r   )rT   r   r   
batch_sizeru   r   r   target_dtypepatch_embedsclass_embedsr   s              r4   rg   z#BridgeTowerVisionEmbeddings.forward   s6   '3'9'9$
Avu'Vt-F%SWSbSbJb$VHAeW4KDOOK\\]^b^m^m]nnpq  ++2288++LOO,O,OP#++A.88A>++22:q"EYYl;C
##d&C&CJPVX]&^^J  $d&=&=d>O>O&PPJr3   F)r*   r+   r,   r    rG   r.   rh   intr   r/   rg   ri   rj   s   @r4   r{   r{      se    q6 q,'D5<< 'D 'DUX 'D]b]i]i 'DRE$5$5 Z_ZfZf r3   r{   c                        e Zd Z fdZ	 ddej
                  defdZ	 ddej
                  defdZdej
                  fdZ	 xZ
S )	BridgeTowerVisionTransformerc           	      0   t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t        |      | _
        t	        j
                  |j                  |j                        | _        |j                  | _        |j                  set	        j                  t        |j                        D cg c]-  }t	        j
                  |j                  |j                        / c}      | _        y y c c}w Nr@   )rF   rG   r{   r   r   rK   rI   rL   ln_prerl   transformerln_postshare_layernormrp   rq   rn   ln_separatert   s      r4   rG   z%BridgeTowerVisionTransformer.__init__  s    5f=ll6#5#56;P;PQ1&9||F$6$6F<Q<QR%55%%!}}V[\b\t\tVuvQRf00f6K6KLv D &vs   2Dr   r   c                    | j                  ||      }| j                  |      }|j                  ddd      }| j                  ||      }t	        j
                  |d      }|j                  dddd      }| j                  r| j                  |      }|S g }t        || j                        D ]  \  }} ||      }|j                  |         t	        j
                  |d      }|S )Nr   r   r   r   r	   )r   r   r   r   r.   stackr   r   zipr   rw   )rT   r   rX   r   r(   hidden_states_stacklns          r4   rg   z$BridgeTowerVisionTransformer.forward  s     6NOM2%--aA6((GMq9%--aAq9 LL7M  #%%(8H8H%I :!r "= 1#**=9: "KK(;CMr3   c                 t    | j                  ||      }| j                  |      }|j                  ddd      }|S )Nr   r   r   r   )r   r   r   )rT   r   r   r(   s       r4   forward_prez(BridgeTowerVisionTransformer.forward_pre,  s?    
 OghM2%--aA6r3   rW   c                 N    |j                  ddd      }| j                  |      }|S )Nr   r   r   )r   r   )rT   rW   visual_output_posts      r4   forward_postz)BridgeTowerVisionTransformer.forward_post7  s-    )11!Q:!\\*<=!!r3   r   )r*   r+   r,   rG   r.   rh   r`   rg   r   r   ri   rj   s   @r4   r   r     sX    " */	ll #'	< */	ll	 #'	" "r3   r   c                   $     e Zd Z fdZd Z xZS )BridgeTowerLinkTowerc                    t         |           |j                  | _        |j                  | _        |j                  dv r|j                  dk(  r.t	        j
                  t        j                  d            | _        n<|j                  dk(  r-t	        j
                  t        j                  d            | _	        t	        j                  | j                  |j                        | _
        y t        d|j                   d      )	N)add
scaled_addr   r         ?r   r   r@   link_tower_type  is not implemented)rF   rG   link_tower_typerI   r   r   r.   tensorscaled_factorbetarK   rL   NotImplementedErrorrS   s     r4   rG   zBridgeTowerLinkTower.__init__>  s    %55!--!!%II%%5%'\\%,,s2C%D"''=8LLc):;	\\$*:*:@U@UVDN%(89O9O8PPc&deer3   c                 Z   | j                   dk(  r| j                  ||z         S | j                   dk(  r!| j                  || j                  z  |z         S | j                   dk(  r1| j                  |d| j                  z
  z  || j                  z  z         S t	        d| j                    d      )Nr   r   r   r   r   r   )r   rK   r   r   r   )rT   r(   cross_modal_hidden_statesrX   s       r4   rg   zBridgeTowerLinkTower.forwardK  s    5(>>-2K"KLL!!\1>>-$2D2D"DG`"`aa!!]2>>-1tyy="AD]`d`i`iDi"ijj%(89M9M8NNa&bccr3   r*   r+   r,   rG   rg   ri   rj   s   @r4   r   r   =  s    fdr3   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BridgeTowerSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y r   )rF   rG   r   rO   rI   denserK   rL   Dropouthidden_dropout_probdropoutrS   s     r4   rG   zBridgeTowerSelfOutput.__init__X  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r3   r(   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rc   r   r   rK   rT   r(   r   s      r4   rg   zBridgeTowerSelfOutput.forward^  7    

=1]3}|'CDr3   r*   r+   r,   rG   r.   rh   rg   ri   rj   s   @r4   r   r   W  1    >U\\  RWR^R^ r3   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BridgeTowerIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y rc   )rF   rG   r   rO   rI   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnrS   s     r4   rG   z BridgeTowerIntermediate.__init__g  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r3   r(   r   c                 J    | j                  |      }| j                  |      }|S rc   )r   r   rT   r(   s     r4   rg   zBridgeTowerIntermediate.forwardo  s&    

=100?r3   r   rj   s   @r4   r   r   f  s#    9U\\ ell r3   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BridgeTowerOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )rF   rG   r   rO   r   rI   r   rK   rL   r   r   r   rS   s     r4   rG   zBridgeTowerOutput.__init__w  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r3   r(   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rc   r   r   s      r4   rg   zBridgeTowerOutput.forward}  r   r3   r   rj   s   @r4   r   r   v  r   r3   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BridgeTowerPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y rc   )rF   rG   r   rO   rI   r   Tanh
activationrS   s     r4   rG   zBridgeTowerPooler.__init__  s9    YYv1163E3EF
'')r3   r(   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r  )rT   r(   first_token_tensorpooled_outputs       r4   rg   zBridgeTowerPooler.forward  s6     +1a40

#566r3   r   rj   s   @r4   r  r    s#    $
U\\ ell r3   r  c                       e Zd Zd fd	Z eddd      	 	 	 	 	 	 ddej                  deej                     deej                     d	eej                     dee	   d
ee
   deej                     deej                     fd       Z xZS )BridgeTowerSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                        | _        |xs t#        |dd      | _        | j$                  dk(  s| j$                  d	k(  rF|j&                  | _        t        j(                  d
|j&                  z  dz
  | j                        | _        |j,                  | _        || _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_queryr   r   )rF   rG   rI   num_attention_headshasattrr   r   attention_head_sizeall_head_sizer   rO   querykeyvaluer   attention_probs_dropout_probr   getattrr  max_position_embeddingsr   distance_embedding
is_decoder	layer_idxrT   rU   r  r  rV   s       r4   rG   z!BridgeTowerSelfAttention.__init__  s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++"r3   past_key_valuepast_key_values4.58new_nameversionr(   rX   	head_maskencoder_hidden_statesoutput_attentionscache_positionr   c                 	   |j                   \  }}	}
| j                  |      }|j                  |d| j                  | j                        j                  dd      }|d u}|St        |t              rA|j                  j                  | j                        }|r|j                  }n|j                  }n|}|r|n|}|rK|IrGj                  | j                     j                  }|j                  | j                     j                  }n| j!                  |      }|j                  |d| j                  | j                        j                  dd      }| j#                  |      }|j                  |d| j                  | j                        j                  dd      }|D|s|nd }j%                  ||| j                  d|i      \  }}|rd|j                  | j                  <   t'        j(                  ||j                  dd            }| j*                  dk(  s| j*                  dk(  r|j                   d   |j                   d   }}|Dt'        j,                  |dz
  t&        j.                  |j0                  	      j                  dd      }n@t'        j2                  |t&        j.                  |j0                  	      j                  dd      }t'        j2                  |t&        j.                  |j0                  	      j                  dd      }||z
  }| j5                  || j6                  z   dz
        }|j9                  |j:                  
      }| j*                  dk(  rt'        j<                  d||      }||z   }nE| j*                  dk(  r6t'        j<                  d||      }t'        j<                  d||      }||z   |z   }|t?        j@                  | j                        z  }|||z   }tB        jD                  jG                  |d      }| jI                  |      }|||z  }t'        j(                  ||      }|jK                  dddd      jM                         }|jO                         d d | jP                  fz   }|j                  |      }||fS )Nr   r   r   r*  Tr  r  rZ   r   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   r   r	   ))r   r  r   r  r  r   r   r   
is_updatedgetr  cross_attention_cacheself_attention_cachelayerskeysrd   r  r  updater.   matmulr  r   longr\   r   r  r  r_   r[   einsummathsqrtr   r   softmaxr   r   
contiguousr   r  )rT   r(   rX   r'  r(  r"  r)  r*  r   
seq_lengthru   query_layeris_cross_attentionr-  curr_past_key_valuecurrent_states	key_layervalue_layerattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapes                                  r4   rg   z BridgeTowerSelfAttention.forward  sN    %2$7$7!
Jjj/!&&z2t7O7OQUQiQijttq
 3$>&/+>?,77;;DNNK
%*9*O*O'*9*N*N'&5#2D.-/"=*+224>>BGGI-44T^^DKKK0I!z2t7O7OQUQiQijtt1I **^4K%**B 8 8$:R:Ri1o  *7It)<)C)C{DNN=M~<^*&	; &AEO..t~~> !<<Y5H5HR5PQ''>9T=Y=Y]q=q'2'8'8';Y__Q=O*L*!&j1nEJJWdWkWk!l!q!q" "'l%**UbUiUi!j!o!oprtu!v"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s +dii8P8P.QQ%/.@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BCo--r3   NNNNNNFN)r*   r+   r,   rG   r   r.   rh   r   r/   r   r`   r1   rg   ri   rj   s   @r4   r  r    s    #6 %0A6R 7;15=A+/,115d.||d. !!2!23d. E--.	d.
  ((9(9:d. "%d. $D>d. !.d. 
u||	d. Sd.r3   r  eagerc                       e Zd Zd fd	Zd Z eddd      	 	 	 	 	 	 ddej                  deej                     d	eej                     d
eej                     dee
   dee   deej                     deej                     fd       Z xZS )BridgeTowerAttentionc                     t         |           t        |j                     |||      | _        t        |      | _        t               | _        y )Nr  r  )	rF   rG   #BRIDGE_TOWER_SELF_ATTENTION_CLASSES_attn_implementationrT   r   outputsetpruned_headsr   s       r4   rG   zBridgeTowerAttention.__init__   sF    78S8ST$;
	
 ,F3Er3   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )lenr   rT   r  r  rZ  r   r  r  r  rX  r   r  union)rT   headsindexs      r4   prune_headsz BridgeTowerAttention.prune_heads*  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r3   r!  r"  r#  r$  r(   rX   r'  r(  r)  r*  r   c           	      r    | j                  |||||||      }| j                  |d   |      }	|	f|dd  z   }
|
S )NrX   r'  r(  r"  r)  r*  r   r   )rT   rX  )rT   r(   rX   r'  r(  r"  r)  r*  self_outputsattention_outputoutputss              r4   rg   zBridgeTowerAttention.forward<  s\     yy)"7+/) ! 
  ;;|AF#%QR(88r3   rO  rP  )r*   r+   r,   rG   r`  r   r.   rh   r   r/   r   r`   r1   rg   ri   rj   s   @r4   rS  rS    s    ";$ %0A6R 7;15=A+/,115|| !!2!23 E--.	
  ((9(9: "% $D> !. 
u||	 Sr3   rS  c                   V     e Zd Zd fd	Z eddd      	 	 	 	 	 	 d	d       Zd Z xZS )
BridgeTowerBertCrossLayerc                    t         |           |j                  | _        d| _        t	        ||      | _        |j                  | _        |j                  | _        t	        ||      | _        t        |      | _
        t        |      | _        y )Nr   r  )rF   rG   chunk_size_feed_forwardseq_len_dimrS  ra   r  add_cross_attentioncrossattentionr   intermediater   rX  rT   rU   r  rV   s      r4   rG   z"BridgeTowerBertCrossLayer.__init__V  su    '-'E'E$-f	J ++#)#=#= 26YO3F;'/r3   r!  r"  r#  r$  c	           	          | j                  ||d |d       }	|	d   }
|	dd  }| j                  |
||||||      }|d   }
||dd  z   }t        | j                  | j                  | j
                  |
      }|f|z   }|S )N)rX   r'  r)  r"  r   r   rb  )ra   rm  r   feed_forward_chunkrj  rk  )rT   r(   r(  rX   r'  encoder_attention_maskr"  r)  r*  self_attention_outputsrd  re  cross_attention_outputslayer_outputs                 r4   rg   z!BridgeTowerBertCrossLayer.forwarda  s     "&)/  "0 "
 2!4 ),"&"5"51"7+/) #6 #
 3153AB770##T%A%A4CSCSUe
  /G+r3   c                 L    | j                  |      }| j                  ||      }|S rc   rn  rX  rT   rd  intermediate_outputru  s       r4   rq  z,BridgeTowerBertCrossLayer.feed_forward_chunk  ,    "//0@A{{#68HIr3   rc   rP  )r*   r+   r,   rG   r   rg   rq  ri   rj   s   @r4   rg  rg  U  sB    	0 %0A6R
 #+ S+Zr3   rg  c                   :    e Zd Zd fd	Z eddd      	 	 	 	 	 	 	 ddej                  deej                     deej                     d	eej                     d
eej                     dee	   dee
   deej                     deej                     fd       Zd Z xZS )BridgeTowerTextLayerc                 l   t         |           |j                  | _        d| _        t	        ||      | _        |j                  | _        |j                  | _        | j                  r-| j                  st        |  d      t	        |d|      | _	        t        |      | _        t        |      | _        y )Nr   ri  z> should be used as a decoder model if cross attention is addedr  rU  )rF   rG   rj  rk  rS  ra   r  rl  r   rm  r   rn  r   rX  ro  s      r4   rG   zBridgeTowerTextLayer.__init__  s    '-'E'E$-f	J ++#)#=#= ##?? D6)g!hii"6vWamv"wD3F;'/r3   r!  r"  r#  r$  r(   rX   r'  r(  rr  r)  r*  r   c	           	      h   | j                  ||||||      }	|	d   }
| j                  r|	dd }n|	dd  }| j                  rB|@t        | d      st        d|  d      | j	                  |
||||||      }|d   }
||dd z   }t        | j                  | j                  | j                  |
      }|f|z   S )	N)rX   r'  r)  r"  r*  r   r   r   rm  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`rb  )	ra   r  r  r   rm  r   rq  rj  rk  )rT   r(   rX   r'  r(  rr  r"  r)  r*  rs  rd  re  rt  ru  s                 r4   rg   zBridgeTowerTextLayer.forward  s    "&)/+) "0 "
 2!4 ??,Qr2G,QR0G??4@4!12 =dV DD D 
 '+&9&9 5#&; /"3- ': '#  7q9 7" ==G0##T%A%A4CSCSUe
 ((r3   c                 L    | j                  |      }| j                  ||      }|S rc   rw  rx  s       r4   rq  z'BridgeTowerTextLayer.feed_forward_chunk  rz  r3   rc   )NNNNNFN)r*   r+   r,   rG   r   r.   rh   r   r/   r   r`   r1   rg   rq  ri   rj   s   @r4   r|  r|    s    0 %0A6R 7;15=A>B+/,1152)||2) !!2!232) E--.	2)
  ((9(9:2) !)):): ;2) "%2) $D>2) !.2) 
u||	2) S2)hr3   r|  c                   f    e Zd Zd fd	Z	 	 	 	 	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     deej                     deeeej                           dee	   d	ee	   d
ee	   dee	   deej
                     de
eej
                     ef   fdZ xZS )BridgeTowerTextEncoderc           	          t         |           || _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        d| _	        y c c}w )Nri  F)
rF   rG   rU   r   rp   rq   rn   r|  rf   gradient_checkpointing)rT   rU   r  irV   s       r4   rG   zBridgeTowerTextEncoder.__init__  sV    ]]@EfF^F^@_`1!&A6`

 ',# as   A%r(   rX   r'  r(  rr  r"  	use_cacher)  output_hidden_statesreturn_dictr*  r   c                 4   |	rdnd }|rdnd }|r| j                   j                  rdnd }| j                  r%| j                  r|rt        j                  d       d}|rL| j                   j                  r6|4t        t        | j                         t        | j                               }|rP| j                   j                  r:t        |t              r*t        j                  d       t        j                  |      }t        | j                        D ]W  \  }}|	r||fz   }|||   nd } |||||||||      }|d   }|s/||d   fz   }| j                   j                  sO||d	   fz   }Y |	r||fz   }|
st        d
 |||||fD              S t        |||||      S )Nr2   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...F)rU   zPassing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.)rr  r"  r)  r*  r   r   r   c              3   $   K   | ]  }|| 
 y wrc   r2   .0vs     r4   	<genexpr>z1BridgeTowerTextEncoder.forward.<locals>.<genexpr>(  s      
 = 
   )last_hidden_stater"  r(   r)   cross_attentions)rU   rl  r  trainingloggerwarning_oncer  r   r   r   r1   from_legacy_cache	enumeraterf   r   )rT   r(   rX   r'  r(  rr  r"  r  r)  r  r  r*  all_hidden_statesall_self_attentionsall_cross_attentionsr  layer_modulelayer_head_masklayer_outputss                      r4   rg   zBridgeTowerTextEncoder.forward  s    #7BD$5b4%64;;;Z;Zr`d&&4==##p "	//O4K1,dkk2RT`hlhshsTtuO//JPU4V\
 2CCOTO(4 	VOA|#$58H$H!.7.CilO(%'= /"3-	M *!,M &9]1=M<O&O#;;22+?=QRCSBU+U(+	V.   1]4D D 
 "#%'(
 
 
 9+++*1
 	
r3   rc   )
NNNNNNFFTN)r*   r+   r,   rG   r.   rh   r   r/   r1   r`   r   r   rg   ri   rj   s   @r4   r  r    s"   , 7;15=A>BEI$(,1/4&*15P
||P
 !!2!23P
 E--.	P

  ((9(9:P
 !)):): ;P
 "%e.?.?(@"ABP
 D>P
 $D>P
 'tnP
 d^P
 !.P
 
uU\\"$MM	NP
r3   r  c                   2     e Zd ZdZ fdZ	 ddZd Z xZS )BridgeTowerTextEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                    t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j'                  dt)        j*                  |j                        j-                  d      d       | j'                  d	t)        j.                  | j0                  j3                         t(        j4                  
      d       |j                  | _        t        j                  |j                  |j
                  | j6                        | _	        y )N)padding_idxr@   r  r  r   r   Fr   token_type_idsr   )rF   rG   r   r   
vocab_sizerI   pad_token_idword_embeddingsr  position_embeddingstype_vocab_sizetoken_type_embeddingsrK   rL   r   r   r   r  r  r   r.   r   r   zerosr   r   r5  r  rS   s     r4   rG   z"BridgeTowerTextEmbeddings.__init__C  si   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	

 "..#%<<**F,>,>DL\L\$
 r3   c                    |+|t        || j                  |      }n| j                  |      }||j                         }n|j                         d d }|d   }|st	        | d      r-| j
                  d d d |f   }|j                  |d   |      }	|	}n:t        j                  |t        j                  | j                  j                        }|| j                  |      }| j                  |      }
||
z   }| j                  dk(  r| j                  |      }||z  }| j!                  |      }| j#                  |      }|S )Nr   r   r  r   rZ   r  )"create_position_ids_from_input_idsr  &create_position_ids_from_inputs_embedsr   r  r  r   r.   r  r5  r   r\   r  r  r  r  rK   r   )rT   	input_idsr  r   inputs_embedspast_key_values_lengthinput_shaper;  buffered_token_type_ids buffered_token_type_ids_expandedr  r   r  s                r4   rg   z!BridgeTowerTextEmbeddings.forward\  sR    $A)TM]M]_uv#JJ=Y #..*K',,.s3K ^

 !t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r3   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr   r   rZ   r   )r   r.   r   r  r5  r\   r   r   )rT   r  r  sequence_lengthr   s        r4   r  z@BridgeTowerTextEmbeddings.create_position_ids_from_inputs_embeds  s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r3   )NNNNr   )r*   r+   r,   r-   rG   rg   r  ri   rj   s   @r4   r  r  =  s    

4 rs&P=r3   r  c                     | j                  |      j                         }t        j                  |d      j	                  |      |z   |z  }|j                         |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   r   )ner   r.   cumsumtype_asr5  )r  r  r  maskincremental_indicess        r4   r  r    sW     <<$((*D <<!4<<TBE[[_cc##%33r3   c                   L    e Zd ZU eed<   dZdZddgZdZde	j                  fdZy	)
BridgeTowerPreTrainedModelrU   bridgetowerFr  r=   r"  modulec                 x   | j                   j                  }t        |t              r0| j                   j                  dz  d| j                   j
                  z  dz  z  }| j                   j                  dz  }d| j                   j                  z  dz  }|j                  j                  D ]/  }t        j                  j                  |j                  j                  ||z         |j                  j                  j                  j                          t        j                  j                  |j                  j                   j"                  ||z         t        j                  j                  |j$                  j&                  j"                  ||z         t        j                  j                  |j$                  j(                  j"                  ||z         2 t        j                  j                  |j*                  j,                  ||z         t        j                  j                  |j*                  j.                  j"                  ||z         nt        |t        j0                  t        j2                  t        j4                  f      r+|j"                  j                  j                  dd|z         nt        |t        j6                        rJ|j8                  j                  j                          |j"                  j                  j;                  d       nIt        |t<              r9|j>                  j                  j;                  | j                   j@                         t        |t        j0                  tB        f      r2|j8                  %|j8                  j                  j                          y y y )Ng      r   )stdg        g?)meanr  r   )"rU   initializer_factorr   r   rI   rn   r   rr   r   initnormal_rJ   in_proj_weightin_proj_biasdatazero_out_projr   rP   rB   rE   r   r   r   rO   r   r   rK   r   fill_!BridgeTowerForContrastiveLearninglogit_scalelogit_scale_init_valueBridgeTowerMLMHead)rT   r  r  proj_stdattn_stdfc_stdry   s          r4   _init_weightsz(BridgeTowerPreTrainedModel._init_weights  s   kk,,f:;//51t{{?\?\;\ae:efH{{..4H$++111d:F++55 M

 9 9x#~N

'',,224

 3 3 : :3O		 5 56C<H		 0 0 7 7X^LM GGOOF--==8c>ORGGOOF--@@GGXX[^O\BIIr|| DEMM&&CTCZ&@-KK""$MM$$S) AB##))$++*L*LMfryy*<=>6;;CZKK""$ D[>r3   N)r*   r+   r,   r   r0   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placementr   Moduler  r2   r3   r4   r  r    s6    %&+#35ST"3%BII %r3   r  c                   B     e Zd ZU eed<    fdZed        ZddZ xZ	S )BridgeTowerVisionModelrU   c                 D    t         |   |       t        |      | _        y rc   )rF   rG   r   visualrS   s     r4   rG   zBridgeTowerVisionModel.__init__  s     26:r3   c                 j    | j                   j                  j                  j                  j                  S rc   )r  r   r   r   r[   rT   s    r4   r[   zBridgeTowerVisionModel.dtype  s$    {{%%55<<BBBr3   c                 Z    | j                  |j                  | j                        ||      S rc   )r  typer[   )rT   image
image_maskr   s       r4   rg   zBridgeTowerVisionModel.forward  s#    {{5::djj1:?WXXr3   )NF)
r*   r+   r,   r    r0   rG   propertyr[   rg   ri   rj   s   @r4   r  r    s*    ##; C CYr3   r  a0  
    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in *Attention is
    all you need*_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz
    Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.

    .. _*Attention is all you need*: https://huggingface.co/papers/1706.03762
    c            "           e Zd ZU eed<   d fd	Zd Zd Zd Ze		 	 	 	 	 	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
ej                     de
eej                        de
e   de
e   de
e   de
e   de
ej                     deeej                     ef   fd       Z xZS )BridgeTowerTextModelrU   c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rF   rG   rU   r  r   r  encoderr  pooler	post_init)rT   rU   add_pooling_layerrV   s      r4   rG   zBridgeTowerTextModel.__init__  sN    
 	 3F;-f53D'/$ 	r3   c                 .    | j                   j                  S rc   r   r  r  s    r4   get_input_embeddingsz)BridgeTowerTextModel.get_input_embeddings  s    ...r3   c                 &    || j                   _        y rc   r  rT   r  s     r4   set_input_embeddingsz)BridgeTowerTextModel.set_input_embeddings  s    */'r3   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  rf   ra   r`  )rT   heads_to_prunerf   r^  s       r4   _prune_headsz!BridgeTowerTextModel._prune_heads   sE    
 +002 	CLE5LLu%//;;EB	Cr3   r  rX   r  r   r'  r  r(  rr  r"  r  r)  r  r  r*  r   c                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j                   j                  r|
|
n| j                   j
                  }
nd}
||t        d      |#| j                  ||       |j                         }n!||j                         d d }nt        d      |\  }}||j                  n|j                  }d}|	5t        |	t              s|	d   d   j                  d   n|	j                         }|t        j                  |||z   f|      }|pt!        | j"                  d      r4| j"                  j$                  d d d |f   }|j'                  ||      }|}n&t        j(                  |t        j*                  |	      }| j-                  ||      }| j                   j                  rE|C|j                         \  }}}||f}|t        j                  ||      }| j/                  |      }nd }| j1                  || j                   j2                        }| j#                  |||||
      }| j5                  ||||||	|
||||      }|d   }| j6                  | j7                  |      nd }|s
||f|dd  z   S t9        |||j:                  |j<                  |j>                  |j@                        S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr   r,  r\   r  rZ   )r  r   r  r  r  )
rX   r'  r(  rr  r"  r  r)  r  r  r*  r   )r  r'   r"  r(   r)   r  )!rU   r)  r  use_return_dictr  r  r   %warn_if_padding_and_no_attention_maskr   r\   r   r   r   get_seq_lengthr.   onesr  r   r  r   r  r5  get_extended_attention_maskinvert_attention_maskget_head_maskrn   r  r  r   r"  r(   r)   r  ) rT   r  rX   r  r   r'  r  r(  rr  r"  r  r)  r  r  r*  r  r   r;  r\   r  r  r  extended_attention_maskencoder_batch_sizeencoder_sequence_lengthru   encoder_hidden_shapeencoder_extended_attention_maskembedding_outputencoder_outputssequence_outputr	  s                                    r4   rg   zBridgeTowerTextModel.forward  s4   $ 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B];;!!%.%:	@U@UII ]%>cdd"66y.Q#..*K&',,.s3KTUU!,
J%.%:!!@T@T!"& "/59  "1%++B/$335 # !"ZZ*jCY6Y)ZdjkN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_al0m ;;!!&;&G=R=W=W=Y: 7$68O#P %-).4HQW)X&.2.H.HI_.`+.2+ &&y$++2O2OP	??%)'#9 + 
 ,,2"7#B+/!5#) ' 
 *!,8<8OO4UY#]3oab6III;-'+;;)77&11,==
 	
r3   )T)NNNNNNNNNNNNNN)r*   r+   r,   r   r0   rG   r  r  r  r   r   r.   rh   listr/   r`   r   r1   r   rg   ri   rj   s   @r4   r  r    s    "! /0C  -11515/3,0048<9==A$(,0/3&*15s
ELL)s
 !.s
 !.	s

 u||,s
 ELL)s
  -s
  (5s
 !) 6s
 "$u'8'8"9:s
 D>s
 $D>s
 'tns
 d^s
 !.s
  
uU\\"$PP	Q!s
 s
r3   r  zv
    The bare BridgeTower Model transformer outputting BridgeTowerModelOutput object without any specific head on
    c            "           e Zd Z fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee   dee   dee   dee   deej                     dedeeej                     ef   fd       Zd Z xZS )BridgeTowerModelc           	         t         |   |       || _        |j                  }|j                  }|j
                  r_t        j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _
        nt        j                  t        |j                        D cg c],  }t        j                  |j                  |j                        . c}      | _	        t        j                  t        |j                        D cg c],  }t        j                  |j                  |j                        . c}      | _
        t        j                  d|j                        | _        t!        |      | _        t%        |      | _        |j(                  s|j*                  r| j"                  j,                  j.                  D ]  }| j"                  j,                  j0                  j2                  j4                  |j2                  _        | j"                  j,                  j0                  j6                  j4                  |j6                  _         t        j                  t        |j                        D cg c]  }t9        ||       c}      | _        t        j                  t        |j                        D cg c]  }t9        ||       c}      | _        t?        |      | _         t?        |      | _!        t        jD                  |j                  |jF                        | _$        t        jD                  |j                  |jF                        | _%        |jL                  r!tO        |      | _(        tO        |      | _)        nt        j                  t        |j                  dz
        D cg c]  }tO        |       c}      | _(        t        j                  t        |j                  dz
        D cg c]  }tO        |       c}      | _)        | jU                          y c c}w c c}w c c}w c c}w c c}w c c}w )Nr   ri  r@   r   )+rF   rG   rU   vision_configtext_config$share_cross_modal_transformer_layersr   rO   rI   cross_modal_text_transformcross_modal_image_transformrp   rq   rn   r   r  r  vision_modelr  
text_modelr   "init_layernorm_from_vision_encoderr  cross_modal_ln_separater   r   r  r   rg  cross_modal_image_layerscross_modal_text_layersr  cross_modal_image_poolercross_modal_text_poolerrK   rL   cross_modal_text_layernormcross_modal_image_layernormshare_link_tower_layersr   cross_modal_text_link_towercross_modal_image_link_towerr  )rT   rU   r  r  ru   r   r  rV   s          r4   rG   zBridgeTowerModel.__init__  s?    ,,((66.0ii8O8OQWQcQc.dD+/1yy9R9RTZTfTf/gD,.0mmQVW]WoWoQpqA;22F4F4FGq/D+ 02}}SXY_YqYqSrsa=44f6H6HIs0D, &(\\!V5G5G%H"2=A.{;,,1Z1Z''..FF J!%!2!2!9!9!A!A!H!H!M!M		#0077??DDIIJ )+JOPVPhPhJijQ&{a@j)
% (*}}JOPVPhPhJijQ&{a@j(
$
 ):&(A%'8'@$ +-,,v7I7IvOdOd*e'+-<<8J8JPVPePe+f())/CF/KD,0DV0LD-/1}}7<V=U=UXY=Y7Z[!%f-[0D, 137<V=U=UXY=Y7Z[!%f-[1D- 	W r t k k  \ \s$   1Q$1QQQQQc                 6    | j                   j                         S rc   )r  r  r  s    r4   r  z%BridgeTowerModel.get_input_embeddings  s    3355r3   c                 :    | j                   j                  |       y rc   )r  r  r  s     r4   r  z%BridgeTowerModel.set_input_embeddings  s    ,,U3r3   r  rX   r  r   
pixel_maskr'  r  r:   image_token_type_idxr)  r  r  labelsr   r   c                    |
|
n| j                   j                  }
||n| j                   j                  }|rdnd}|rdnd}|rdnd}|rdnd}|
rdnd}||t        d      ||n| j                   j                  }|	r|	nd}	|j                         }| j                  j                  |      }|r||fz  }|0t        j                  |t        j                  |j                        }| j                  j                  ||      j                  |j                        }t        | j                  j                  j                         | j                   j"                  z
  dz   }| j                  j                  j                   d| D ]  } |||      d   }|s||fz  } |K| j$                  j&                  j)                  |j+                  | j$                  j,                        |      }n|j/                  ddd	      }|r||fz  }| j$                  j&                  j0                  j2                  d| D ]  } ||      }|s||fz  } | j$                  j&                  j5                  |j+                  | j$                  j,                              }| j7                  |      }| j9                  t        j:                  dt        j                  |j                              j=                  |      }| j?                  ||z         }| jA                  |      }| j9                  t        jB                  d
|	t        j                  |j                              j=                  |      }||z   }| jE                  |      }t        j                  |j                  d      |j                  d      ft        j                  |j                        }| j                  j                  ||j                               j                  |j                        } | jF                  d   |||||
      } | d   }! | jH                  d   |||||
      }"|"d   }#|r||!|#ffz  }|
r|| d   |"d   ffz  }d}$tK        |t        | j                  j                  j                               D ]r  }% | j                  j                  j                   |%   ||      d   } | j$                  j&                  j0                  j2                  |%   |      j+                  | j$                  j,                        }| jA                  | j$                  j&                  j5                  |            |z   }| jL                  |$   }&| jN                  |$   }' |&| j7                  |      |z   |!|      }( |'||#|      }) | jF                  |$dz      |(|)|||
      } | d   }! | jH                  |$dz      |)|(|||
      }"|"d   }#|$dz  }$|r||fz  }||fz  }||!|#ffz  }|
se|| d   |"d   ffz  }u |!|#}+}*| jQ                  |*|+      },|r|||f}|stS        d |*|+|,||fD              S tU        |*|+|,||      S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        image_token_type_idx (`int`, *optional*):
            - The token type ids for images.
        output_hidden_states (`bool`, *optional*):
            If set to `True`, hidden states are returned as a list containing the hidden states of text, image, and
            cross-modal components respectively. i.e. `(hidden_states_text, hidden_states_image,
            hidden_states_cross_modal)` where each element is a list of the hidden states of the corresponding
            modality. `hidden_states_txt/img` are a list of tensors corresponding to unimodal hidden states and
            `hidden_states_cross_modal` is a list of tuples containing `cross_modal_text_hidden_states` and
            `cross_modal_image_hidden_states` of each brdige layer.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels are currently not supported.

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerModel
        >>> from PIL import Image
        >>> import requests

        >>> # prepare image and text
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
        >>> text = "hello world"
        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base")
        >>> model = BridgeTowerModel.from_pretrained("BridgeTower/bridgetower-base")

        >>> inputs = processor(image, text, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> outputs.keys()
        odict_keys(['text_features', 'image_features', 'pooler_output'])
        ```Nr2   zYBridgeTowerModel does not use `inputs_embeds`.  Make sure to pass in `input_ids` instead.r   )r  rZ   r   r   r   r   )rX   rr  r)  c              3   $   K   | ]  }|| 
 y wrc   r2   r  s     r4   r  z+BridgeTowerModel.forward.<locals>.<genexpr>  s      = r  )r%   r&   r'   r(   r)   )+rU   r)  r  r   r  r   r  r   r.   r  r5  r\   r  r_   r\  r  rf   rn   r  r  r   r  r[   r   r   rr   r   r
  r  r  	expand_asr  r  fullr  r  r  rq   r  r  get_cls_featuresr1   r$   )-rT   r  rX   r  r   r  r'  r  r:   r  r)  r  r  r  r   all_hidden_states_textall_hidden_states_imageall_hidden_states_crossr  r  r  r9   extend_text_maskssplit_indexrf   ry   image_embeds_with_lncross_modal_texttext_token_type_embeddingsimage_token_type_embeddingscross_modal_imageextend_image_maskslayer_outputs_textcross_text_featureslayer_outputs_imagecross_image_featureslink_layer_indexr  text_link_towerimage_link_towercross_text_features_cross_image_features_r%   r&   cls_featuress-                                                r4   rg   zBridgeTowerModel.forward  s   j 2C1N-TXT_T_TqTq$8$D $++JjJj 	 (<(<"$(<"$"6BD$5b4$):%k  &1%<k$++B]B]7K3QRnn&oo0090E"{n4"!"ZZ5::iN^N^_N OOGGXcdgg

 $//117784;;;X;XX[\\ __,,22<K@ 	9E->?BK#&;.8&		9 ,,33??!!$"3"3"9"9:Um @ L
 (//1a8L#6# &&--99CCL[Q 	;E .L#'L?:'	;
  $0077DD\EVEVW[WhWhWnWnEop  ::;G%)%?%?KKI4D4DE&

)$
% 	#  ::;KNh;hi#??@TU&*&@&@JJt1IL\L\]'

)(
) 	$  46QQ <<=QRZZ##A&(9(>(>q(AB**##


 "__HHU_UdUdUfgjj
 =T99!<,#5/
 13>d;;A>-#4/
  315#)<>R(S'UU#%7%:<OPQ<R$S#UU {C(?(?(E(E$FG 0	ZA:$//1177:;HYZ[\]KL4,,33??II!L\Z__!!''L 001B1B1I1I1V1VWc1de-. !
 #>>?OPO#@@AQR $3//<?YY#!$ 
 %55IK_as$t! "T!=!=>NQR>R!S$%0'9"3" #5Q"7"U$"?"?@PST@T"U%$1'8"3# $7q#9 !#&;.8&'L?:''-@BV,W+YY' #);A)>@STU@V(W'YY#a0	Zf )<=Q~,,]NK!79PRi j 'GXZmn   &')&+*
 	
r3   c                 x    | j                  |      }| j                  |      }t        j                  ||gd      S )Nr   r   )r  r  r.   r   )rT   r%   r&   cls_features_textcls_features_images        r4   r#  z!BridgeTowerModel.get_cls_features  s<     88G!::>Jyy+-?@bIIr3   )NNNNNNNNNNNNNF)r*   r+   r,   rG   r  r  r   r   r.   
LongTensorr/   r   r`   r   r1   rh   r$   rg   r#  ri   rj   s   @r4   r  r    s   6p64  156:594815155948.2,0/3&*-1).j
E,,-j
 !!2!23j
 !!1!12	j

 u001j
 U--.j
 E--.j
   1 12j
 u001j
 'smj
 $D>j
 'tnj
 d^j
 ))*j
 #'j
  
uU\\"$::	;!j
 j
XJr3   r  c                   $     e Zd Z fdZd Z xZS )"BridgeTowerPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )rF   rG   r   rO   rI   r   r   r   r   r
   transform_act_fnrK   rL   rS   s     r4   rG   z+BridgeTowerPredictionHeadTransform.__init__  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr3   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rc   )r   r@  rK   r   s     r4   rg   z*BridgeTowerPredictionHeadTransform.forward  s4    

=1--m<}5r3   r   rj   s   @r4   r>  r>    s    Ur3   r>  c                   &     e Zd Zd fd	Zd Z xZS )r  c                 p   t         |           || _        t        |      | _        t        j                  |j                  |j                  j                  d      | _
        t        j                  t        j                  |j                  j                              | _        ||| j                  _        y y )NF)r   )rF   rG   rU   r>  	transformr   rO   rI   r  r  decoderr   r.   r  r   r   )rT   rU   r   rV   s      r4   rG   zBridgeTowerMLMHead.__init__  s    ;FCyy!3!3V5G5G5R5RY^_LLV-?-?-J-J!KL	"(DLL r3   c                 d    | j                  |      }| j                  |      | j                  z   }|S rc   )rD  rE  r   )rT   x	mlm_scores      r4   rg   zBridgeTowerMLMHead.forward  s-    NN1%	LL+dii7	r3   rc   r   rj   s   @r4   r  r    s    )r3   r  c                   $     e Zd Z fdZd Z xZS )BridgeTowerITMHeadc                 X    t         |           t        j                  |d      | _        y Nr   rF   rG   r   rO   fc)rT   rI   rV   s     r4   rG   zBridgeTowerITMHead.__init__  s     ))K+r3   c                 (    | j                  |      }|S rc   rN  )rT   rG  	itm_scores      r4   rg   zBridgeTowerITMHead.forward  s    GGAJ	r3   r   rj   s   @r4   rJ  rJ    s    ,r3   rJ  z\
    BridgeTower Model with a language modeling head on top as done during pretraining.
    c                       e Zd ZdgZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee	j                     dee	j                     dee   dee   dee   dee	j                     deeee	j                     f   fd       Z xZS )BridgeTowerForMaskedLMzmlm_score.decoder.weightc                     t         |   |       t        |      | _        t	        |      | _        | j                          y rc   )rF   rG   r  r  r  rH  r  rS   s     r4   rG   zBridgeTowerForMaskedLM.__init__  s5     +F3+F3 	r3   c                 .    | j                   j                  S rc   rH  rE  r  s    r4   get_output_embeddingsz,BridgeTowerForMaskedLM.get_output_embeddings  s    ~~%%%r3   c                 &    || j                   _        y rc   rV  )rT   new_embeddingss     r4   set_output_embeddingsz,BridgeTowerForMaskedLM.set_output_embeddings  s    !/r3   r  rX   r  r   r  r'  r  r:   r)  r  r  r  r   c                    ||n| j                   j                  }| j                  |||||||||	|
|      }| j                  |r|j                  n|d         }d}|kt               }|j                  |j                        } ||j                  d| j                   j                  j                        |j                  d            }|st        |      }||f|z   S |S t        |||j                  |j                        S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000360943.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
        >>> text = "a <mask> looking out of the window"

        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
        >>> model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")

        >>> # prepare inputs
        >>> encoding = processor(image, text, return_tensors="pt")

        >>> # forward pass
        >>> outputs = model(**encoding)

        >>> results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())

        >>> print(results)
        .a cat looking out of the window.
        ```N
rX   r  r   r  r'  r  r:   r)  r  r  r   r   r7   r8   r(   r)   )rU   r  r  rH  r%   r   r_   r\   r   r  r  r1   r   r(   r)   )rT   r  rX   r  r   r  r'  r  r:   r)  r  r  r  re  
mlm_logitsmasked_lm_lossloss_fctrX  s                     r4   rg   zBridgeTowerForMaskedLM.forward  s   d &1%<k$++B]B]""))%!'%/!5# # 
 ^^[G$9$9gVWjY
')HYYz001F%joob$++:Q:Q:\:\&]_e_j_jkm_noN:&F3A3M^%.YSYY!//))	
 	
r3   NNNNNNNNNNNN)r*   r+   r,   _tied_weights_keysrG   rW  rZ  r   r   r.   r<  r/   r`   r   r   r1   rg   ri   rj   s   @r4   rS  rS    sj    55&0  156:594815155948,0/3&*-1Q
E,,-Q
 !!2!23Q
 !!1!12	Q

 u001Q
 U--.Q
 E--.Q
   1 12Q
 u001Q
 $D>Q
 'tnQ
 d^Q
 ))*Q
 
~uU%6%677	8Q
 Q
r3   rS  z
    BridgeTower Model transformer with a classifier head on top (a linear layer on top of the final hidden state of the
    [CLS] token) for image-to-text matching.
    c                       e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   deej                     de
eeej                     f   fd       Z xZS )#BridgeTowerForImageAndTextRetrievalc                     t         |   |       t        |      | _        t	        |j
                  dz        | _        | j                          y rL  )rF   rG   r  r  rJ  rI   rQ  r  rS   s     r4   rG   z,BridgeTowerForImageAndTextRetrieval.__init__U  s@     +F3+F,>,>,BC 	r3   r  rX   r  r   r  r'  r  r:   r)  r  r  r  r   c                    ||n| j                   j                  }| j                  |||||||||	|
|      }|r|j                  n|d   }| j	                  |      }d}|.t               }|j                  |j                        } |||      }|st        |      }||f|z   S |S t        |||j                  |j                        S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        labels (`torch.LongTensor` of shape `(batch_size, 1)`, *optional*):
            Labels for computing the image-text matching loss. 0 means the pairs don't match and 1 means they match.
            The pairs with 0 will be skipped for calculation.

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
        >>> import requests
        >>> from PIL import Image

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
        >>> texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]

        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
        >>> model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")

        >>> # forward pass
        >>> scores = dict()
        >>> for text in texts:
        ...     # prepare inputs
        ...     encoding = processor(image, text, return_tensors="pt")
        ...     outputs = model(**encoding)
        ...     scores[text] = outputs.logits[0, 1].item()
        ```Nr\  r   r]  )rU   r  r  r'   rQ  r   r_   r\   r1   r   r(   r)   )rT   r  rX   r  r   r  r'  r  r:   r)  r  r  r  re  r'   r8   itm_lossr`  rX  s                      r4   rg   z+BridgeTowerForImageAndTextRetrieval.forward_  s    \ &1%<k$++B]B]""))%!'%/!5# # 
 2=--'!*.')HYYv}}-F/H6]F-5-AXK&(MvM'!//))	
 	
r3   ra  )r*   r+   r,   rG   r   r   r.   r<  r/   r`   r   r   r1   rg   ri   rj   s   @r4   rd  rd  N  sV     156:594815155948,0/3&*-1Q
E,,-Q
 !!2!23Q
 !!1!12	Q

 u001Q
 U--.Q
 E--.Q
   1 12Q
 u001Q
 $D>Q
 'tnQ
 d^Q
 ))*Q
 
'u/@/@)AA	BQ
 Q
r3   rd  c                   $     e Zd Z fdZd Z xZS )BridgeTowerContrastiveHeadc                 X    t         |           t        j                  ||      | _        y rc   rM  )rT   rI   
embed_sizerV   s      r4   rG   z#BridgeTowerContrastiveHead.__init__  s     ))K4r3   c                 (    | j                  |      }|S rc   rP  )rT   rG  s     r4   rg   z"BridgeTowerContrastiveHead.forward  s    GGAJr3   r   rj   s   @r4   ri  ri    s    5r3   ri  zl
    BridgeTower Model with a image-text contrastive head on top computing image-text contrastive loss.
    c                       e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   dee	   de
eeej                     f   fd       Z xZS )r  c                    t         |   |       t        |      | _        t	        |j
                  |j                        | _        t	        |j
                  |j                        | _        t	        |j
                  dz  |j                        | _	        t        j                  t        j                  | j                  j                              | _        | j#                          y rL  )rF   rG   r  r  ri  rI   contrastive_hidden_sizeitc_text_headitc_image_headitc_cross_modal_headr   r   r.   r   rU   r  r  r  rS   s     r4   rG   z*BridgeTowerForContrastiveLearning.__init__  s     +F378J8JFLjLjk89K9KVMkMkl$>v?Q?QTU?UW]WuWu$v!<<T[[5W5W(XYr3   r  rX   r  r   r  r'  r  r:   r)  r  r  return_lossr   c                 H   ||n| j                   j                  }| j                  |||||||||	d|      }|r|j                  n|d   }|r|j                  n|d   \  }}}|d   }|d   }| j                  j
                  j                  j                  |      }| j                  j                  t        j                  ddt        j                  | j                  j                  j                  j                  	            j                  |      }| j                  j                  |      |z   }t         j"                  j%                  | j'                  |ddd
ddf         dd      }t         j"                  j%                  | j)                  |ddd
ddf         dd      j+                  |j                        }t         j"                  j%                  | j-                  |      dd      j+                  |j                        }t        j.                  |||gd      }| j0                  j3                         j+                  |j                        }t        j4                  ||j7                               |z  }t        j4                  ||j7                               |z  }t        j4                  ||j7                               |z  }d}|rt        j8                  t;        |      |j                        }t         j"                  j=                  ||      }t         j"                  j=                  ||      }t         j"                  j=                  ||      }||z   |z   dz  }|s||||f|dd z   } ||f| z   S | S t?        ||||||j                  |j@                        S )a  
        image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
            Optionally, instead of passing `pixel_values`, you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `pixel_values` into patch embeddings.
        return_loss (`bool`, *optional*):
            Whether or not to return the contrastive loss.

        Examples:

        ```python
        >>> from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
        >>> import requests
        >>> from PIL import Image
        >>> import torch

        >>> image_urls = [
        ...     "https://farm4.staticflickr.com/3395/3428278415_81c3e27f15_z.jpg",
        ...     "http://images.cocodataset.org/val2017/000000039769.jpg",
        ... ]
        >>> texts = ["two dogs in a car", "two cats sleeping on a couch"]
        >>> images = [Image.open(requests.get(url, stream=True).raw) for url in image_urls]

        >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
        >>> model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")

        >>> inputs = processor(images, texts, padding=True, return_tensors="pt")
        >>> loss = model(**inputs, return_loss=True).loss

        >>> inputs = processor(images, texts[::-1], padding=True, return_tensors="pt")
        >>> loss_swapped = model(**inputs, return_loss=True).loss

        >>> print("Loss", round(loss.item(), 4))
        Loss 0.0019

        >>> print("Loss with swapped images", round(loss_swapped.item(), 4))
        Loss with swapped images 2.126
        ```NTr\  r   r	   r   r  r   rZ   r   )r   pr  r,  r   g      @)r7   r8   r9   r:   r;   r(   r)   )!rU   r  r  r'   r(   r  r  r   r  r.   r"  r5  r   r\   r!  r  r   r   	normalizerp  rq  r_   rr  r   r  expr4  tr   r\  cross_entropyr6   r)   )!rT   r  rX   r  r   r  r'  r  r:   r)  r  r  rs  re  r'   hidden_states_txthidden_states_imghidden_states_cross_modalr9   r)  r,  r;   r8   r  logits_text_to_imagelogits_text_to_crosslogits_image_to_crossitc_lossr  text_to_image_losstext_to_cross_lossimage_to_cross_lossrX  s!                                    r4   rg   z)BridgeTowerForContrastiveLearning.forward  sz   j &1%<k$++B]B]""))%!'%/!%# # 
 2=--'!*%0G!!gaj 	H,.G (+(,#//<<CCPPQ]^&*&6&6&L&LJJtQejj9I9I9_9_9f9f9m9mn'

)(
) 	$ ''CCDXY\ww mm--d.@.@QPQSTWAU.V\^bc-d}}..t/B/B<PQSTVWPWCX/Y_aef.gjj%% k 
 }}..t/H/H/W]_cd.ehh%% i 
 k<FBO&&**,//{7I7I/J$||K9IJ[X$||K9IJ[X %\<>>;K L{ Z\\#f+fmmDF!#!<!<=QSY!Z!#!<!<=QSY!Z"$--"="=>SU["\*-??BUUY\\Hk<FQRQSTF-5-AXK&(MvM+#%%!//))
 	
r3   )NNNNNNNNNTNN)r*   r+   r,   rG   r   r   r.   r<  r/   r`   r   r6   r1   rg   ri   rj   s   @r4   r  r    sO     156:594815155948,0/3&*&*x
E,,-x
 !!2!23x
 !!1!12	x

 u001x
 U--.x
 E--.x
   1 12x
 u001x
 $D>x
 'tnx
 d^x
 d^x
 
+U53D3D-EE	Fx
 x
r3   r  )r  rd  rS  r  r  )r   )Qr-   r7  collectionsr   dataclassesr   typingr   r   r.   torch.utils.checkpointr   torch.nnr   activationsr
   r   cache_utilsr   r   r   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   utils.deprecationr   configuration_bridgetowerr   r   r    
get_loggerr*   r  _TOKENIZER_FOR_DOCr$   r6   r  r=   rl   r{   r   r   r   r   r   r  r  rV  rS  rg  r|  r  r  r  r  r  r  r  r>  r  rJ  rS  rd  ri  r  __all__r2   r3   r4   <module>r     s$      # ! "    % 6 C C 9  . l l 7 7 0 h h 
		H	%'  
:[ : :$ 
:; : :4)299 )XRYY 6P")) Pf7"299 7"td299 d4BII bii  		 		  A.ryy A.J %' #3299 3l=		 =@G5 GVY
RYY Y
zV=		 V=t4  % % %DY7 Y U
5 U
U
p 
oJ1 oJ
oJf	 "    
d
7 d

d
N ]
*D ]
]
@  
G
(B G

G
Tr3   