o
    i+U                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZmZ g dg dg dg dgZejd	d
dg dg dg dg dgdfddg dg dg dg dgdfddg dg dg dg dgdfd
dg dg dg dg dgg dfd
dg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfg
dd Zdd Zdd Zejd	d
dg dg dg dg dgdfddg dg dg dg dgdfdd g dg dg dg dgdfddg dg dg dg dgdfddg dg dg dg dgg dfddg dg dg dg dgg d!fddg dg dg dg dgg d"fddg dg dg dg dgg dfgd#d$ Zejd%d&d' Zejd(ddgd)d* Zejd(g d+d,d- Zd.d/ Zejd0ed1d2d3d4 Zd5d6 Z ejd7d
dg d8g d9g d:fddg d8g d;g d<fddg d=g d;g d>fgd?d@ Z!ejdAd
g dBg dCg dDg dEgdfdg dFg dFg dGg dHgdfdg dIg dJg dKg dKgdfgejdLg dMdNdO Z"ejd(g d+dPdQ Z#dRdS Z$ejdTdg dUdfdg dUdfgdVdW Z%dXdY Z&ejdZej'ej(ej)gejd[dej(ej)gejdLg dMd\d] Z*ejd^ej'ej(ej)gejdLg dMd_d` Z+dadb Z,ejdcdddedf edgD fdhdidf edgD fdjdkdf edgD fgdldm Z-ejd(g d+dndo Z.dpdq Z/drds Z0dS )t    N)clone)KBinsDiscretizerOneHotEncoder)assert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equalignore_warnings      ?)r         @      )r   g      @r         ?)   g      @r      z2strategy, quantile_method, expected, sample_weightuniformwarn)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   kmeans)r   r   r   r   quantileaveraged_inverted_cdf)r   r   r   r   )r   r   r   r   )r   r      r   c                 C   s\   t dd| |d}ttd |jt|d W d    n1 sw   Y  t|t| d S )Nr   ordinaln_binsencodestrategyquantile_method)categorysample_weight)r   r	   UserWarningfitXr   	transform)r   r    expectedr#   est r*   ~/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s   Br,   c                   C   sX   t dddt t tdgd ddt t dddtjjttks*J d S )Nr   r   r   r    r   )	r   fit_transformr&   nparrayr%   n_bins_dtypeintr*   r*   r*   r+   test_valid_n_bins_   s   r4   c                  C   sJ  t dd} t| dd}d}tjt|d |t W d    n1 s%w   Y  g d} t| dd}d}tjt|d |t W d    n1 sMw   Y  g d} t| dd}d	}tjt|d |t W d    n1 suw   Y  g d
} t| dd}d}tjt|d |t W d    d S 1 sw   Y  d S )N)r             @r   r-   z:n_bins must be a scalar or array of shape \(n_features,\).match)r   r   r   r   r   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.) @r   r:   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r/   fullr   pytestraises
ValueErrorr.   r&   )r   r)   err_msgr*   r*   r+   test_invalid_n_bins_arrayi   s4   "r@   )r   r   r   r   r9   )r   r   r   r   linear)r   r   r   r   )r   r   r   r   c                 C   s   t g dd| |djt|d}t|t| ttjd }|jj|fks(J t	|j|j
D ]\}}|j|d fks=J q/d S )Nr   r   r   r   r   r   r"   r   )r   r%   r&   r   r'   r/   r0   shape
bin_edges_zipr1   )r   r    r(   r#   r)   Z
n_features	bin_edgesr   r*   r*   r+   test_fit_transform_n_bins_array   s   6rG   z&ignore: Bins whose width are too smallc                  C   s   t dgdgdgdgdgdgg} tddd	d
d}|j| g dd t|jd g d t|| dgdgdgdgdgdgg dS )z;Check the impact of `sample_weight` one computed quantiles.r   r   r   r   i  i  
   r   r   r   r   )r   r   r   r   r   r   r"   r   )r   r   r   r   r           g      ?      @N)r/   r0   r   r%   r   rD   r'   r&   r)   r*   r*   r+   *test_kbinsdiscretizer_effect_sample_weight   s   ",rL   r   c                 C   sb   | dkrt dd| dd}nt dd| d}tjg dtjd}t|}|jt|d	 t|| d
S )z7Make sure that `sample_weight` is not changed in place.r   r   r   r   r   )r   r   r   )r   r   r   r   r2   r"   N)r   r/   r0   float64copyr%   r&   r   )r   r)   r#   Zsample_weight_copyr*   r*   r+   /test_kbinsdiscretizer_no_mutating_sample_weight   s   
rP   )r   r   r   c                 C   s   t d tddgddgddgddgg}| dkr#t| ddd	d
}nt| ddd}d}tjt|d || W d    n1 sCw   Y  |j	d dksQJ |
|}t|d d df t|jd  d S )Nalwaysr   r   r   r   r   r   r   r   )r   r   r   r    )r   r   r   z2Feature 0 is constant and will be replaced with 0.r7   )warningssimplefilterr/   r0   r   r<   warnsr$   r%   r1   r'   r   ZzerosrC   )r   r&   r)   warning_messageXtr*   r*   r+   test_same_min_max   s"   
"
&rW   c                  C   s   t d} tddd}tt ||  W d    n1 s w   Y  tddd}|| dd tt ||  W d    d S 1 sJw   Y  d S )Nr5   r   r   r-   r   r   )	r/   aranger   r<   r=   r>   r%   reshaper'   rK   r*   r*   r+   test_transform_1d_behavior  s   
"rZ   ir   	   c                 C   sZ   t g ddd}t g ddd}|d|   }tdddd	|}t|| d S )
N)r6         @g      @g       @g      $@r   r   )r   r   r   r   r   rH   r   r   r   r   r   r    )r/   r0   rY   r   r.   r   )r[   ZX_initZXt_expectedr&   rV   r*   r*   r+   test_numeric_stability  s   r_   c                  C   s   t g ddddt} | t}t g ddddt} | t}t|r)J ttdd dD dd	|| t g dd
ddt} | t}t|sRJ ttdd dD dd	|	 |	  d S )NrB   r   r   r^   onehot-densec                 S      g | ]}t |qS r*   r/   rX   .0r[   r*   r*   r+   
<listcomp>5      z'test_encode_options.<locals>.<listcomp>F)
categoriesZsparse_outputonehotc                 S   ra   r*   rb   rc   r*   r*   r+   re   @  rf   T)
r   r%   r&   r'   spissparser   r   r.   Ztoarray)r)   ZXt_1ZXt_2ZXt_3r*   r*   r+   test_encode_options'  sJ   




rk   zIstrategy, quantile_method, expected_2bins, expected_3bins, expected_5bins)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r5   r5   )r   r   r   r   r   r   )r   r   r   r   r   r5   )r   r   r   r   r   r   )r   r   r   r   r5   r5   c                 C   s   t g ddd}td| |dd}||}t||  td| |dd}||}t||  td| |dd}||}t||  d S )	N)r   r   r   r   r\   rH   r   r   r   r   r   r   r    r   r      )r/   r0   rY   r   r.   r   Zravel)r   r    Zexpected_2binsZexpected_3binsZexpected_5binsr&   r)   rV   r*   r*   r+   test_nonuniform_strategiesH  s    


rn   z&strategy, expected_inv,quantile_method)      r6         r   )r   rJ         r   )r   r]   ro   r   )r   r]   ro   r   )g      g      @g      g      )g      g      @g      g      ?)g      ?g      @g      g      ?)ro   r6   rp   g      )r   rJ   rq   rI   )r   r]   ro   g      ?r   )r   rh   r`   c                 C   s2   t d| ||d}|t}||}t|| d S )Nr   rl   )r   r.   r&   inverse_transformr   )r   r   Zexpected_invr    kbdrV   Xinvr*   r*   r+   test_inverse_transformq  s   %

ru   c                 C   s   t g dd d d f }| dkrtd| ddd}ntd| dd}|| t dd	gd d d f }||}t|jd
dd |j t|jd
dd
g d S )Nr   r   r   r   r   r5   r   r   )r   r   r   r    )r   r   r   r   rm   r   )Zaxisr   )	r/   r0   r   r%   r'   r   maxr1   min)r   r&   rs   ZX2ZX2tr*   r*   r+    test_transform_outside_fit_range  s   

ry   c                  C   s   t g dd d d f } |  }tdddd}|| }t| | | }||}t|| t|t dgdgdgdgg d S )	Nrv   r   r   r   )r   r    r   r   r   r   )r/   r0   rO   r   r.   r   rr   )r&   ZX_beforer)   rV   Z	Xt_beforert   r*   r*   r+   test_overwrite  s   



$rz   z-strategy, expected_bin_edges, quantile_method)r   r   r   c                 C   s|   dgdgdgdgdgdgg}t d| |d d}d}tjt|d || W d    n1 s/w   Y  t|jd | d S )Nr   r   )r   r   r    	subsample'Consider decreasing the number of bins.r7   )r   r<   rT   r$   r%   r   rD   )r   Zexpected_bin_edgesr    r&   rs   rU   r*   r*   r+   test_redundant_bins  s   r}   c                  C   s   t g ddd} t g d}t g ddd}tdddd	d
}d}tjt|d ||  W d    n1 s>w   Y  t|j	d | t|
| | d S )N)皙?r~   ffffff?r   r   )r~   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   )r   r   r5   rH   r   r   rA   r   r|   r7   r   )r/   r0   rY   r   r<   rT   r$   r%   r   rD   r'   )r&   rF   rV   rs   rU   r*   r*   r+   !test_percentile_numeric_stability  s   	r   in_dtype	out_dtypec                 C   st   t jt| d}td|d|d}|| |d ur|}n|d u r)|jt jkr)t j}n|j}||}|j|ks8J d S NrM   r   r   )r   r   r    r2   )	r/   r0   r&   r   r%   r2   float16rN   r'   )r   r   r   X_inputrs   Zexpected_dtyperV   r*   r*   r+   test_consistent_dtype  s   

r   input_dtypec                 C   sh   t jt| d}td|dt jd}|| ||}td|dt jd}|| ||}t|| d S r   )	r/   r0   r&   r   float32r%   r'   rN   r   )r   r   r   Zkbd_32ZXt_32Zkbd_64ZXt_64r*   r*   r+   test_32_equal_64  s$   



r   c                  C   s   t g ddd} tddddd}||  t|}|jd d	 ||  t|jd
 |jd
 D ]\}}t j	
|| q2|jj|jjksHJ d S )Nr
   r   r   rH   r   r   r   r   r{   r   )r/   r0   rY   r   r%   r   
set_paramsrE   rD   testingr   rC   )r&   Zkbd_defaultZkbd_without_subsamplingZbin_kbd_defaultZbin_kbd_with_subsamplingr*   r*   r+   'test_kbinsdiscretizer_subsample_default"  s    

r   zencode, expected_namesrh   c                 C   .   g | ]}t d D ]}d| dt| qqS r5   feat_rangefloatrd   col_idZbin_idr*   r*   r+   re   =      re   r   r`   c                 C   r   r   r   r   r*   r*   r+   re   E  r   r   c                 C      g | ]}d | qS r   r*   )rd   r   r*   r*   r+   re   K  rf   c                 C   s|   g dg dg dg dg}t d| dd|}||}dd	 td
D }||}|jd |jd ks7J t|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    r   r   r   r   r   r   r   r   r   r   r5   r   r5   r   r^   c                 S   r   r   r*   rc   r*   r*   r+   re   Y  rf   z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>r   r   r   N)r   r%   r'   r   Zget_feature_names_outrC   r   )r   Zexpected_namesr&   rs   rV   Zinput_featuresZoutput_namesr*   r*   r+   *test_kbinsdiscrtizer_get_feature_names_out8  s   

r   c                 C   s   t j|dd }| dkrt| d|dd}nt| d|d}|| t|}|jd d || t|j	d	 |j	d	 d
d d S )N)i r   r   r   iP  r   )r   r{   random_stater    )r   r{   r   r   r   g{Gz?)Zrtol)
r/   randomZRandomStateZrandom_sampler   r%   r   r   r   rD   )r   Zglobal_random_seedr&   Zkbd_subsamplingZkbd_no_subsamplingr*   r*   r+   test_kbinsdiscretizer_subsample`  s$   


r   c                  C   s`   g dg dg dg dg} t jtdd tdd|  W d    d S 1 s)w   Y  d S )	Nr   r   r   r   a%  The current default behavior, quantile_method='linear', will be changed to quantile_method='averaged_inverted_cdf' in scikit-learn version 1.9 to naturally support sample weight equivalence properties by default. Pass quantile_method='averaged_inverted_cdf' explicitly to silence this warning.r7   r   )r   )r<   rT   FutureWarningr   r%   )r&   r*   r*   r+   $test_quantile_method_future_warnings}  s   	"r   c                  C   sn   g dg dg dg dg} d}t jt|d tddd	j| g d
d W d    d S 1 s0w   Y  d S )Nr   r   r   r   zWhen fitting with strategy='quantile' and sample weights, quantile_method should either be set to 'averaged_inverted_cdf' or 'inverted_cdf', got quantile_method='linear' instead.r7   r   rA   )r   r    )r   r   r   r   r"   )r<   r=   r>   r   r%   )r&   Zexpected_msgr*   r*   r+   /test_invalid_quantile_method_with_sample_weight  s   "r   )1rR   numpyr/   r<   Zscipy.sparsesparseri   Zsklearnr   Zsklearn.preprocessingr   r   Zsklearn.utils._testingr   r   r   r   r	   r&   markZparametrizer,   r4   r@   rG   filterwarningsrL   rP   rW   rZ   r   r_   rk   rn   ru   ry   rz   r}   r   r   r   rN   r   r   r   r   r   r   r   r*   r*   r*   r+   <module>   s   
A

&
5




!
#	

	

