o
    ir                    @   s  U d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	m
Z
 ddlZddlZddlZddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z; ddl4m<Z= ddl>m?Z? ddl@mAZAmBZBmCZCmDZDmEZEmFZF ddlGmHZHmIZImJZJmKZK ddlLmMZM dZNdZOe)e+dZPe*e,dZQeR ZSeReTd< eSUeP eSUeQ g dZVeWg dg dg dg dg d g d!g d"g d#g d$g d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2gZXg d3ZYg d4ZZd5d6gd6d6gd6d5gd7d7gd7d8gd8d7ggZ[g d9Z\d6d6gd8d8gd:d8ggZ]g d;Z^e_ Z`ejabd7Zcecde`jejfZge`jheg e`_he`jeeg e`_eei ZjecdejjejfZgejjheg ej_hejjeeg ej_eek ZlecdeljejfZgeljheg el_heljeeg el_eeMdZmejndd<d=d>\ZoZpemjqd?d@ZrdAererdBk< emjsddCdDd@Zte'dEd=dFddGu Zve`jhe`jedHejjhejjedHeljheljedHe[e\dHeXeYdHeXeZdHeoepdHeretdHer etdHevetdHewdIetdHdJZxdKdL ZydMdN ZzdOdP Z{ej|}dQeQ~ ej|}dReOdSdT ZdUdV ZdWdX Zej|}dYeQ ej|}dReOdZd[ ZeFej|}dYeQ ej|}d\d]d^e d_fd`dEe d_fdad^e d_fdbd^ed<fgdcdd Zdedf Zdgdh Zdidj Zdkdl Zdmdn Zdodp Zdqdr Zdsdt Zdudv Zdwdx Zdydz ZdKd{d|Zej|}d}eSd~d Zej|}d}eVej|}deJdd Z	dKddZej|}d}eSdd Zej|}d}eVej|}deJdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zej|}d}ePdd Zej|}d}ePdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdKddZej|}deVej|}dddd Zej|}deeeVeQej|}dddgdd Zej|}deVej|}dg dej|}deJdd Zej|}dee
dd eVD eOee
dd eVD eN ej|}dg dej|}deJdd Zej|}deVej|}deeJeKddń ZddǄ Zej|}d}eSddɄ Zej|}d}eSej|}ddgeJ dd̄ Zej|}d}eSdd΄ Zej|}d}eVej|}deKddф Zddӄ Zej|}d}eSddՄ Zej|}d}eSej|}deKddׄ Zddل Zddۄ Zej|}ddgeJ dd݄ Zej|}deeex ddh ej|}de)e+gdd Zej|}dex ej|}de*e,gdd Zdd Zdd Zdd Zej|}d}eSej|}dddgej|}ddgeJ eK dd Zej|}dRg dej|}dQeQ~ dd Zej|}ded:dd Zdd Zej|}dQe)e+gej|}dd8dCgdd Zdd Zdd Zdd Zdd  Zΐdd Zϐdd ZАdd Zѐdd ZҐd	d
 ZӐdd Zej|}dee0~ e1~ dd ZՐdd Zej|}dQeS~ dd Zej|}dRd]dagdd Zej|}ded:ej|}dRd]dagdd Zej|}dRddgdd Zej|}dRddgdd Zej|}dRddgdd Zej|}dRddgd d! Zej|}ddgeK ej|}d"e*d`d#e,d`d#gd$d% Zej|}dQeQ~ d&d' Zߐd(d) Zej|}d*eje*dfeje,d+fee)d,fee+d-fgej|}d.dd/gd0d1 Zej|}d2eeP~ d3d4gd5d6 Zej|}d7eje*feje)fgd8d9 Zd:d; Zej|}dQe*e,gej|}d<eWejd8ejdCd=d>geWejejd:dCd=d>geWd7d8d:dCejejgeWd7d8d:ejd>ejggej|}dRd]dagd?d@ ZdAdB ZdCdD ZdEdF ZdGdH ZdIdJ ZdS (L  z-
Testing for the tree module (sklearn.tree).
    N)chainpairwiseproduct)NumpyPickler)assert_allclose)clonedatasetstree)DummyRegressor)NotFittedError)SimpleImputer)accuracy_scoremean_poisson_deviancemean_squared_error)cross_val_scoretrain_test_split)make_pipeline)_sparse_random_matrix)DecisionTreeClassifierDecisionTreeRegressorExtraTreeClassifierExtraTreeRegressor)CRITERIA_CLFCRITERIA_REGDENSE_SPLITTERSSPARSE_SPLITTERS)_py_sort)
NODE_DTYPE	TREE_LEAFTREE_UNDEFINED_build_pruned_tree_py_check_n_classes_check_node_ndarray_check_value_ndarray)Tree)compute_sample_weight)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warningsskip_if_32bit)	_IS_32BITCOO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERS)check_random_state)ginilog_loss)squared_errorabsolute_errorfriedman_msepoisson)r   r   )r   r   	ALL_TREES)r   r      r   r   r      ir   r   r   r   r   )r   r         r   r:   r   r   r9   皙?r   r8   r9   )r?   r   r         r   r    @r9   r   r   r@   r   r9   )r?   r?   r   g333333r   r   r   r   r   r   r>   r   r   r9   )r?   r?   r   r   r   r   r   r<   r   r   r   r   r   r9   )r?   r   r8   
   r8   r   皙	r   r8   r<   r:   r9   )zG @r         r      r   r   rE            ?r   rC   r9   )rF   r   rG   rH   r   rI   r   r   rE   rJ   r   r   rB   r9   )rF      rG   rH   r   rI   r   r   rE   rJ   r   r   rB   r9   )rF   rL   rG   rH   r   rI   r   r   rE   rJ   rK   r   r?   r   )   rL   r;   r9   rK   r:   rD   r   r9   r=   r<   r   rM   r   )rM   r   r9   r9   r9   r?   r9   r   r   rB   r<   r   r9   r   )rM   r   r9   rM   r<   r?   rD   rM   r   r?   r9   rM   rM   r   )r9   r9   r   rM   rM   r?   r9   rM   r   r=   r9   rM   r<   r   )r<   r9   r   r<   r   r:   rD   r   r9   r=   r<   r   r<   r9   )rF   rL   rG   rH   r   r9   r   r   rE   rJ   rK   r   rC   r9   )rF   rL   rG   rH   r   r9   r   r   rE   rJ         ?r9   r?   r?   )rF   rL   rG   rH   r   rD   r   r   rE   rJ   rK   r   r?   r?   )rM   r   r;   r9   rK   rB   rD   r   r9   r=   r<   r9   r   r?   )rM   r   r9   r9   r9   rB   r9   r   r   rB   r   r   r   r9   )rM   r9   r9   r9   rM   r?   rD   rM   r   r?   r   rM   r9   r9   )r9   r9   r   r   r9   rC   r9   rM   r   r=   r9   rM   r9   r9   )r<   r9   r   r9   r   r:   r9   r   r9   rB   r   r   r9   r   )r9   r9   r   r   r   r   r9   r9   r9   r9   r9   r9   r   r   r   r9   r   r   r9   r   r   r   r   )      ?rA   333333?皙?rD   g333333@@g)\(?{Gz?gףp=
@rR   g?        rP   rM   rI   r   r         @g|?5^?g(\??r   rB   r?   r9   rM   )r?   r?   r?   r9   r9   r9   r<   )r?   r9   r9      rD   )random_state	n_samples
n_features)   r;   sizerT   g?r8   )r[   r[   g      ?)ZdensityrX   Xy)r[   r<   )irisdiabetesdigitstoy	clf_small	reg_small
multilabel
sparse-pos
sparse-neg
sparse-mixzerosc                 C   s   |j | j ksJ d||j | j t| j|j|d  t| j|j|d  | jtk}t|}t| j| |j| |d  t| j	| |j	| |d  t| j
 |j
 |d  t| j
|j
|d  t| j|j|d d	 t| j| |j| |d
 d	 d S )Nz({0}: inequal number of node ({1} != {2})z: inequal children_rightz: inequal children_leftz: inequal featuresz: inequal thresholdz: inequal sum(n_node_samples)z: inequal n_node_samplesz: inequal impurityerr_msgz: inequal value)
node_countformatr(   children_rightchildren_leftr   npZlogical_notfeature	thresholdn_node_samplessumr&   impurityr'   value)dsmessageZexternalinternal r}   k/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/tree/tests/test_tree.pyassert_tree_equal   s>   



r   c                  C   st   t  D ]3\} }|dd}|tt t|ttd	|  |ddd}|tt t|ttd	|  qd S )Nr   rX   Failed with {0}r9   )max_featuresrX   )
	CLF_TREESitemsfitr_   r`   r(   predictTtrue_resultro   namer$   clfr}   r}   r~   test_classification_toy   s   
r   c                  C   s   t  D ]<\} }|dd}|jtttttd t|	t
td|  |jtttttdd t|	t
td|  qd S )Nr   r   sample_weightr   rK   )r   r   r   r_   r`   rr   oneslenr(   r   r   r   ro   fullr   r}   r}   r~    test_weighted_classification_toy   s   
r   r$   	criterionc                 C   s   |dkrt t td }t t| }t t| }nt}t}| |dd}|t| t|	t
| | |ddd}|t| t|	t
| d S )Nr6   r9   r   rX   r   r   rX   )rr   absminr`   arrayr   r   r_   r   r   r   )r$   r   ay_trainy_testregr   r}   r}   r~   test_regression_toy  s   r   c                  C   s   t d} d| d dd df< d| dd dd f< t | j\}}t | | gj}|  } t D ]9\}}|dd}|	||  |
|| dksSJ d||ddd}|	||  |
|| dksnJ d|q5d S )	N)rD   rD   r9   r;   r   r   rO   r   rX   r   )rr   rk   indicesshapevstackravelr   r   r   r   scorero   )r`   ZgridxZgridyr_   r   r$   r   r}   r}   r~   test_xor  s   

 r   c                  C   s   t t tD ]Q\\} }}||dd}|tjtj t|	tjtj}|dks2J d
| ||||ddd}|tjtj t|	tjtj}|dksXJ d
| ||qd S )Nr   r   rV   z0Failed with {0}, criterion = {1} and score = {2}rM   r   rK   )r   r   r   CLF_CRITERIONSr   ra   datatargetr   r   ro   )r   r$   r   r   r   r}   r}   r~   	test_iris3  s   r   z
name, Treec                 C   s\   ||dd}| tjtj ttj|tj}|tdks,J d|  d| d| d S )Nr   r   zFailed with z, criterion = z and score = )r   rb   r   r   r   r   pytestapprox)r   r$   r   r   r   r}   r}   r~   test_diabetes_overfitE  s   r   z&criterion, max_depth, metric, max_lossr3      <   r4   r5   r6   c                 C   sT   |||ddd}| tjtj |tj|tj}d|  k r%|k s(J  J d S )NrJ   r   )r   	max_depthr   rX   )r   rb   r   r   r   )r   r$   r   r   ZmetricZmax_lossr   Zlossr}   r}   r~   test_diabetes_underfitR  s    r   c                  C   s   t  D ]V\} }|dddd}|tjtj |tj}tt	|dt
tjjd d| d tt|d|tjd| d t|tjt|tjdd| d qd S )Nr9   *   r   r   rX   r   r   rl   rL   )r   r   r   ra   r   r   predict_probar'   rr   rv   r   r   ro   r(   Zargmaxr   r&   exppredict_log_proba)r   r$   r   Zprob_predictr}   r}   r~   test_probabilityg  s*   



r   c                  C   sP   t dd d t jf } t d}t D ]\}}|d dd}|| | qd S )Ni'  r   r   rX   )rr   arangenewaxis	REG_TREESr   r   r_   r`   r   r$   r   r}   r}   r~   test_arrayrepr  s   
r   c                  C   s   ddgddgddgddgddgddgg} g d}t  D ]\}}|dd}|| | t|| |d|d	 qt D ]\}}|dd}|| | t|| |d|d	 q=d S )
NrB   r?   r9   rM   )r9   r9   r9   r9   r9   r9   r   r   r   rl   )r   r   r   r(   r   ro   r   r&   )r_   r`   r   TreeClassifierr   TreeRegressorr   r}   r}   r~   test_pure_set  s   (

r   c               
   C   s   t g dg dg dg dg dg dg dg} t g d}t jd	d
3 t D ]%\}}|dd}|| | || |  ||  | ||  |  q,W d    d S 1 s]w   Y  d S )N)gs_c@d	a@籛 `8`@?c@)g_9a@g 8`@g-Vu]@g    @Xd@)gSW j_@r   r   r   )g ً`@4Ta@	lKa@{c@)g|@Y@g~G`a@gwI?lKa@g/"c@)g_@r   r   r   )g:^@r   r   r   )rO   gAw?gtQ?5??rT   g7G?gۺ?gb'?raise)allr   r   )rr   r   Zerrstater   r   r   r   r}   r}   r~   test_numerical_stability  s(   
"r   c               	   C   s   t jdddddddd\} }t D ]2\}}|dd}|| | |j}t|dk}|jd dks9J d		||dksDJ d		|qt
dd}|tjtj t
dttjd
}|tjtj t|j|j d S )N  rD   r<   r   FrY   rZ   n_informativen_redundantZ
n_repeatedshufflerX   r   皙?r   rX   max_leaf_nodes)r   make_classificationr   r   r   feature_importances_rr   rv   r   ro   r   ra   r   r   r   r(   )r_   r`   r   r$   r   ZimportancesZn_importantclf2r}   r}   r~   test_importances  s*   



r   c                  C   s@   t  } tt t| d W d    d S 1 sw   Y  d S )Nr   )r   r   raises
ValueErrorgetattrr   r}   r}   r~   test_importances_raises  s   "r   c               	   C   s   t jdddddddd\} }tdddd	| |}td
ddd	| |}t|j|j t|jj	|jj	 t|jj
|jj
 t|jj|jj t|jj|jj d S )Ni  rD   r<   r   Fr   r1   r;   )r   r   rX   r3   )r   r   r   r   r   r&   r   r(   tree_rs   rq   rp   ru   )r_   r`   r   r   r}   r}   r~   )test_importances_gini_equal_squared_error  s,   
r   c                  C   s  t  D ]\} }|dd}|tjtj |jtt	tjj
d ks%J |dd}|tjtj |jtttjj
d ksBJ |dd}|tjtj |jdksVJ |dd}|tjtj |jdksjJ |dd}|tjtj |jdks~J |dd}|tjtj |jtdtjj
d  ksJ |dd}|tjtj |jtjj
d ksJ |d d}|tjtj |jtjj
d ksJ qd S )	Nsqrt)r   r9   log2r<   rS   rK   rO   )r7   r   r   ra   r   r   Zmax_features_intrr   r   r   r   )r   TreeEstimatorestr}   r}   r~   test_max_features  s4   
 
 





r   c            	   	   C   s  t  D ]o\} }| }tt |t W d    n1 s!w   Y  |tt g dg}tt	 || W d    n1 sFw   Y  | }td d }tt	 |t| W d    n1 sjw   Y  t
t}| }||t t|tt | }tt |t W d    n1 sw   Y  |tt t
t}tt	 ||d d dd f  W d    n1 sw   Y  t
tj}| }|t
t|t tt	 |t W d    n1 sw   Y  tt	 |t W d    n	1 sw   Y  | }|tt tt	 || W d    n	1 s6w   Y  tt	 || W d    n	1 sQw   Y  | }tt |t W d    n	1 sow   Y  qtdd}tjt	dd |g dgg d	 W d    n	1 sw   Y  tjt	d
d |g dgg d W d    d S 1 sw   Y  d S )N)rB   r?   r9   r?   r9   r6   r   zy is not positive.*Poissonmatchr   r9   rM   )r   r   r   zSome.*y are negative.*Poisson)r;   grM   )r   r   r   r   r   r   r_   r   r`   r   rr   asfortranarrayr&   r   r   r   asarrayr   dotapplyr   )	r   r   r   X2y2ZXftZXtr   r}   r}   r~   
test_error  sr   



$r   c                  C   s   t jtjtjjd} tj}tdt	
 D ]R\}}t	| }|d|dd}|| | |jj|jjdk }t |dksAJ d||d	|dd}|| | |jj|jjdk }t |dksfJ d|qd
S )z Test min_samples_split parameterdtypeN  rD   r   )min_samples_splitr   rX   r?   	   r   r>   N)rr   r   ra   r   r	   _treeDTYPEr   r   r7   keysr   r   ru   rq   r   ro   )r_   r`   r   r   r   r   Znode_samplesr}   r}   r~   test_min_samples_split_  s"   r   c            	      C   s   t jtjtjjd} tj}tdt	
 D ]`\}}t	| }|d|dd}|| | |j| }t |}||dk }t |dksHJ d||d|dd}|| | |j| }t |}||dk }t |dkstJ d|qd S )	Nr   r   r;   r   )min_samples_leafr   rX   r8   r   r   )rr   r   ra   r   r	   r   r   r   r   r7   r   r   r   r   bincountr   ro   )	r_   r`   r   r   r   r   outZnode_countsZ
leaf_countr}   r}   r~   test_min_samples_leaf~  s*   

r   c                 C   s  t | d tj}|dur||}t | d }t|jd }t|}t|  }t	dt
dddD ]F\}}	||	|dd}
|
j|||d	 |durS|
j| }n|
j|}tj||d
}||dk }t|||
j ksyJ d| |
jq3|jd }t	dt
dddD ]B\}}	||	|dd}
|
|| |dur|
j| }n|
j|}t|}||dk }t|||
j ksJ d| |
jqdS )zPTest if leaves contain at least min_weight_fraction_leaf of the
    training setr_   Nr`   r   r   rK   rJ   )min_weight_fraction_leafr   rX   r   )weightsz,Failed with {0} min_weight_fraction_leaf={1})DATASETSastyperr   float32rngrandr   rv   r7   r   linspacer   r   r   tocsrr   r   r   ro   )r   r   sparse_containerr_   r`   r   total_weightr   r   fracr   r   node_weightsleaf_weightsr}   r}   r~   check_min_weight_fraction_leaf  sN   


r	  r   c                 C      t | d d S Nra   r	  r   r}   r}   r~   ,test_min_weight_fraction_leaf_on_dense_input     r  csc_containerc                 C      t | d|d d S Nrg   )r  r  r   r  r}   r}   r~   -test_min_weight_fraction_leaf_on_sparse_input  s   r  c                 C   s  t | d tj}|dur||}t | d }|jd }t|  }tdtdddD ]H\}}|||ddd	}	|	|| |durJ|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j dkssJ d
| |	j|	jq+tdtdddD ]K\}}|||ddd	}	|	|| |dur|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j ||	j ksJ d
| |	j|	jq~dS )zzTest the interaction between min_weight_fraction_leaf and
    min_samples_leaf when sample_weights is not provided in fit.r_   Nr`   r   r   rK   r<   r;   )r   r   r   rX   zBFailed with {0} min_weight_fraction_leaf={1}, min_samples_leaf={2}r   )r   r   rr   r   r   r7   r   r  r   r   r   r  r   r   maxr   ro   r   )r   r   r  r_   r`   r  r   r   r  r   r   r  r  r}   r}   r~   4check_min_weight_fraction_leaf_with_min_samples_leaf  s`   







r  c                 C   r
  r  r  r  r}   r}   r~   Btest_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input"  r  r  c                 C   r  r  r  r  r}   r}   r~   Ctest_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input'  s   
r  c                 C   s  t jd| d\}}tdt D ]\}}t| }||dd}||ddd}||ddd}||d	dd}	|d
f|df|df|	d	ffD ]\}
}|
j|ksSJ d|
j||
|| t|
j	j
D ]g}|
j	j| tkr|
j	j| }|
j	j| }|
j	j| }|
j	j| }|
j	j| }|| }|
j	j| }|
j	j| }|
j	j| }|| }|| }|| }|
j	j| |jd  }|||  }||ksJ d||q_qAqd S )Nd   rY   rX   r   r   r   rX   rQ   )r   min_impurity_decreaserX   g-C6?r   gHz>z)Failed, min_impurity_decrease = {0} > {1}z2Failed with {0} expected min_impurity_decrease={1})r   r   r   r7   r   r  ro   r   ranger   rn   rq   r   rw   weighted_n_node_samplesrp   r   )global_random_seedr_   r`   r   r   r   Zest1est2Zest3Zest4r   Zexpected_decreasenodeZ
imp_parentZ
wtd_n_nodeleftZ
wtd_n_leftZimp_leftZwtd_imp_leftrightZwtd_n_rightZ	imp_rightZwtd_imp_rightZwtd_avg_left_right_impZfractional_node_weightZactual_decreaser}   r}   r~   test_min_impurity_decrease1  sh   
r%  c               	      s   t  D ]q\} }d| v rtjtj}}ntjtj}}|dd  ||  ||}g d} fdd|D }t	 }t
|}t| jksLJ |||}	||	ks]J d| |D ]}
tt|j|
||
 d|
 d	|  d
 q_qdS )z8Test pickling preserves Tree properties and performance.
Classifierr   r   )r   rn   capacity	n_classesrq   rp   Zn_leavesrs   rt   rw   ru   r  rx   c                    s   i | ]	}|t  j|qS r}   )r   r   ).0	attributer   r}   r~   
<dictcomp>      ztest_pickle.<locals>.<dictcomp>z6Failed to generate same score  after pickling with {0}z"Failed to generate same attribute z after pickling with rl   N)r7   r   ra   r   r   rb   r   r   pickledumpsloadstype	__class__ro   r(   r   r   )r   r   r_   r`   r   
attributesZfitted_attributeZserialized_objectr!  Zscore2r*  r}   r+  r~   test_pickley  s8   





r4  c                  C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg} ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgg}ddgddgddgddgg}t  D ]\\}}|dd}|| ||}t|| |jdksJ ||}t|dksJ |d jdksJ |d jd	ksJ ||}	t|	dksJ |	d jdksJ |	d jd	ksJ qlt	 D ]\}}
|
dd}|| ||}t
|| |jdksJ qd S )
NrB   r?   r9   rM   r   r<   r   r8   rM   )r8   r8   )r   r   r   r   r(   r   r   r   r   r   r&   )r_   r`   r   Zy_truer   r   r   Zy_hatZprobaZ	log_probar   r   r}   r}   r~   test_multioutput  s^   





r6  c                  C   s   t  D ]\\} }|dd}|tt |jdksJ t|jddg t	tt
td fj}|dd}|t| t|jdksCJ t|jdksLJ t|jddg t|jddgddgg qd S )Nr   r   rM   r?   r9   rB   )r   r   r   r_   r`   
n_classes_r(   Zclasses_rr   r   r   r   r   )r   r   r   _yr}   r}   r~   test_classes_shape  s   

r9  c                  C   sf   t jd d } t jd d }td|}t D ]\}}|dd}|j| ||d t|| | qd S )N}   balancedr   r   r   )	ra   r   r   r%   r   r   r   r&   r   )Zunbalanced_XZunbalanced_yr   r   r   r   r}   r}   r~   test_unbalanced_iris  s   

r<  c                  C   s  t t tjtjgD ]\\} }}|dd}tjtj|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtj|d}tj	}t
||||| tD ]}|tj|d}tj	}t
||||| qvtD ]}|tj|d}tj	}t
||||| qtjtjd d d |d}tj	d d d }t
||||| qd S )Nr   r   r   C)orderr   Fr<   )r   r7   r   rr   float64r   r   ra   r   r   r(   r   r   Zascontiguousarrayr/   r.   )r   r   r   r   r_   r`   csr_containerr  r}   r}   r~   test_memory_layout  s8   
rB  c                  C   s  t dd d t jf } t d}d|d d< t d}d||dk< tdd}|j| ||d t|| t d t dd d t jf } t d}d|dd< d	|dd< d| dddf< t d}d
||d	k< tddd}|j| ||d |j	j
d dksJ d||d	k< tddd}|j| ||d |j	j
d dksJ tj} tj}td| jd d}tdd}|| | ||  t j|| jd d}tdd}|j| ||d |j	jtjjk}t|j	j
| |j	j
|  d S )Nr  rT   2   r   r   r      r9   rM   gRQ?r   g     b@rK   g     H@)Z	minlength)rr   r   r   r   r   r   r(   r   rk   r   rt   ra   r   r   r   randintr   r   rq   r	   r   r   r'   )r_   r`   r   r   
duplicatesr   r|   r}   r}   r~   test_sample_weight5  sD   






rG  c                  C   s   t dd d t jf } t d}d|d d< tdd}t jdd}tt	 |j
| ||d W d    n1 s;w   Y  t d}td}tjt|d	 |j
| ||d W d    d S 1 sew   Y  d S )
Nr  rT   rC  r   r   r9   r   zgInput should have at least 1 dimension i.e. satisfy `len(x.shape) > 0`, got scalar `array(0.)` instead.r   )rr   r   r   r   r   randomr  r   r   r   r   r   reescape	TypeError)r_   r`   r   r   Zexpected_errr}   r}   r~   test_sample_weight_invalidi  s   


"rL  c           	      C   s  t |  }|dd}|tjtj |ddd}|tjtj t|j|j ttjtjtjfj	}|ddddddddddddgdd}|tj| t|j|j |ddd}|tj| t|j|j t
tjj}|tjdk  d	9  < dd
dd}|dd}|tjtj| ||dd}|tjtj t|j|j |dd}|tjtj|d  ||dd}|tjtj| t|j|j d S )Nr   r   r;  class_weightrX   g       @rO   r   r9   r  g      Y@rM   )r   r   ra   r   r   r&   r   rr   r   r   r   r   )	r   r   clf1r   Z
iris_multiZclf3Zclf4r   rN  r}   r}   r~   test_class_weights  s@   





rP  c                 C   sz   t |  }ttttd fj}|dddgdd}d}tjt|d |	t
| W d    d S 1 s6w   Y  d S )	NrM   rK   rO   r?   r9   r   rM  zBnumber of elements in class_weight should match number of outputs.r   )r   rr   r   r`   r   r   r   r   r   r   r_   )r   r   r8  r   rm   r}   r}   r~   test_class_weight_errors  s   "rR  c                  C   sX   t jddd\} }d}t D ]\}}|d |d d| |}| |d ks)J qd S Nr  r9   r  r8   )r   r   )r   make_hastie_10_2r7   r   r   Zget_n_leavesr_   r`   kr   r   r   r}   r}   r~   test_max_leaf_nodes  s   rW  c                  C   sP   t jddd\} }d}t D ]\}}|d|d| |}| dks%J qd S rS  )r   rT  r7   r   r   Z	get_depthrU  r}   r}   r~   test_max_leaf_nodes_max_depth  s   rX  c                  C   sZ   dD ](} t t dgdggddgj| }d|jd   kr%dk s*J d J dqd S )N)r(  rx   rq   rp   rt   rw   rs   ru   r   r9   rC   r<   z Array points to arbitrary memory)r   r   r   r   flat)attrrx   r}   r}   r~   test_arrays_persist  s   "
,r[  c                  C   s\   t d} td}| ddd}t D ]\}}|dd}||| |jjdks+J qd S )Nr   )rD   r[   rM   )rD   r   )	r0   rr   rk   rE  r7   r   r   r   r   )rX   r_   r`   r   r   r   r}   r}   r~   test_only_constant_features  s   

r\  c                  C   s~   t t g dgt df} g d}t D ]$\}}d|vr<|ddd}|| | |jjdks4J |jj	d	ks<J qd S )
N)r   r   r   r   r   r9   rM   r8   r;   rJ      )r8   rI   )r   r   r   r9   r9   rM   rM   rM   r<   r<   r<   Z	ExtraTreer   r9   r   rM   r;   )
rr   Z	transposer   rk   r7   r   r   r   r   rn   r_   r`   r   r   r   r}   r}   r~   ,test_behaviour_constant_feature_after_splits  s   r_  c                  C   s   t t dgdgdgdggt dg} t g d}t D ]$\}}|ddd}|| | |jjdks8J t	|
| t dd	 q t D ]$\}}|ddd}|| | |jjdksaJ t	|| t d
d	 qId S )NrO   rT   )r8   r   )rT   rO   rT   rO   r   r9   r   r5  rK   )r8   )rr   Zhstackr   rk   r   r   r   r   r   r(   r   r   r   r   r^  r}   r}   r~   (test_with_only_one_non_constant_features  s   *r`  c                  C   sf   t ddt jdd} t }tjtdd |	| g d W d    d S 1 s,w   Y  d S )Ng\)c=Hr8   r?   r9   r   r   )r   r9   r   r9   )
rr   repeatr   r@  reshaper   r   r   r   r   )r_   r   r}   r}   r~   test_big_input  s
   "rc  c                  C   sB   ddl m}  tt |   W d    d S 1 sw   Y  d S )Nr   _realloc_test)Zsklearn.tree._utilsre  r   r   MemoryErrorrd  r}   r}   r~   test_realloc  s   "rg  c                  C   s   dt d } tjdd}tjddd}d| d  }td|d}tt	 |
|| W d    n1 s8w   Y  d| d  d }td|d}tt |
|| W d    d S 1 sbw   Y  d S )	NrL   PrD   rM   r   r9   best)splitterr   )structcalcsizerr   rH  randnrE  r   r   r   	Exceptionr   rf  )Zn_bitsr_   r`   Zhuger   r}   r}   r~   test_huge_allocations  s   "ro  c                 C   s(  t |  }t| d }t| d }|dv r'|jd d }|d | }|d | }tt t D ]d}||}|d|d||}	|d|d||}
t|	j|
jd	|  |	
|}| tv re|	|}|	|}tt t D ]%}||tjd}t|

|| | tv rt|
|| t|
|| qkq-d S )	Nr_   r`   )rc   rb   r   r;   rX   r   5{0} with dense and sparse format gave different treesr   )r7   r   r   r-   r.   r/   r   r   r   ro   r   r   r   r   rr   r   r'   )r	   datasetr   r   r_   r`   rY   r  X_sparsery   rz   y_predZy_probaZy_log_probaZsparse_container_testX_sparse_testr}   r}   r~   check_sparse_input/  s>   



rv  	tree_typerr  )re   rd   rc   rg   rh   ri   rj   rk   c                 C   s    |dkrdnd }t | || d S )Nrc   r<   rv  )rw  rr  r   r}   r}   r~   test_sparse_inputX  s   ry  rb   rf   c                 C   s   t | |d d S )NrM   rx  )rw  rr  r}   r}   r~   test_sparse_input_reg_treesk  s   rz  )rh   ri   rj   rk   c           	      C   s  t |  }t| d }||}t| d }|dddd||}|dddd||}t|j|jd|  t|||| |dddd	||}|dddd	||}t|j|jd|  t|||| |d|jd d d
||}|d|jd d d
||}t|j|jd|  t|||| |ddd||}|ddd||}t|j|jd|  t|||| d S )Nr_   r`   r   r9   rM   )rX   r   r   rq  rD   )rX   r   r   )rX   r   r<   r   )	r7   r   r   r   r   ro   r'   r   r   )	rw  rr  r  r   r_   rs  r`   ry   rz   r}   r}   r~   test_sparse_parameterss  sP   r{  ztree_type, criterionc                 C      g | ]}|t v r|qS r}   )r   r)  r	   r}   r}   r~   
<listcomp>      r~  c                 C   r|  r}   )r   r}  r}   r}   r~   r~    r  c           
      C   s   t |  }t| d }||}t| d }|dd|d||}|dd|d||}	t|j|	jd|  t|	||| d S )Nr_   r`   r   r<   rX   r   r   rq  )r7   r   r   r   r   ro   r'   r   )
rw  rr  r  r   r   r_   rs  r`   ry   rz   r}   r}   r~   test_sparse_criteria  s   
r  zcsc_container,csr_containerc                 C   s  t |  }d}d}|}t|}td}g }	g }
d}|g}t|D ]/}||d}||d | }|	| |jdd|fdd }|
| ||7 }|| q t|		tj
}	tj|tj
d}tjt|
tjd}
||
|	|f||fd}| }||
|	|f||fd}| }|jdd|fd}| }|jd	k dksJ |jd	k dksJ |d|d
||}|d|d
||}t|j|jdt ||f}t||D ]s\}}t|j||j| t|||| t|||j| t|j| |j|  t|| ||  t|| |j|  t|||| ttv rHt|||| qd S )Nr<   rD   r   rK   r\   r9   r   r   rT   rp  rq  )r7   rr   r   r0   r  binomialpermutationappendconcatenater   int32r   r   toarrayrE  copyr   rv   r   r   r   ro   r	   r   r'   r   decision_pathr   r   r   )rw  r  rA  r   r   rZ   rY   samplesrX   r   r   offsetindptriZn_nonzero_iZ	indices_iZdata_irs  r_   ru  X_testr`   ry   rz   ZXsZX1r   r}   r}   r~   test_explicit_sparse_zeros  sr   



r  c                 C   s   t |  }tjd d df  }tjd d df d}tj}tt |dd	|| W d    n1 s7w   Y  |dd}|	|| tt |
|g W d    d S 1 s^w   Y  d S )Nr   rQ  r   )r7   ra   r   r   rb  r   r   r   r   r   r   )r   r   r_   ZX_2dr`   r   r}   r}   r~   check_raise_error_on_1d_input  s   
"r  c                 C   s4   t   t|  W d    d S 1 sw   Y  d S N)r*   r  r  r}   r}   r~   test_1d_input"  s   
"r  r  c                 C   s   t |  }tdgdgdgdgdgg}g d}g d}|d ur#||}|dd}|j|||d |jjdks8J |ddd}|j|||d |jjdksNJ d S )	Nr   r9   )r   r   r   r   r9   )r>   r>   r>   r>   r>   r   r   g?)rX   r   )r7   rr   r   r   r   r   )r   r  r   r_   r`   r   r   r}   r}   r~    test_min_weight_leaf_split_level(  s   
r  c                 C   sD   t jtjjdd}t|   }|t t t|	t |j
	| d S NFr  X_smallr   r	   r   r   r7   r   y_smallr(   r   r   )r   	X_small32r   r}   r}   r~   test_public_apply_all_trees<  s   
r  rA  c                 C   sH   |t jtjjdd}t|   }|t t t|	t |j
	| d S r  r  )r   rA  r  r   r}   r}   r~   test_public_apply_sparse_treesE  s   
r  c                  C   sP   t j} t j}tddd| |}|| d d  }t|g dg dg d S )Nr   r9   rp  rM   )r9   r9   r   r9   r   r9   )ra   r   r   r   r   r  r  r(   )r_   r`   r   node_indicatorr}   r}   r~   test_decision_path_hardcodedO  s
   r  c                    s   t j}t j}|jd }t|  }|ddd}||| ||}|   j||jj	fks/J |
|} fddt|D }t|tj|d |jjtk}	tt |	tj|d  jdd }
|jj|
kskJ d S )	Nr   rM   rp  c                    s   g | ]
\}} ||f qS r}   r}   )r)  r  jr  r}   r~   r~  g  s    z&test_decision_path.<locals>.<listcomp>r  r9   Zaxis)ra   r   r   r   r7   r   r  r  r   rn   r   	enumerater'   rr   r   rq   r   r   rv   r  r   )r   r_   r`   rY   r   r   Znode_indicator_csrleavesZleave_indicatorZ
all_leavesr   r}   r  r~   test_decision_pathW  s$   


r  c                 C   sX   t |t}}t|  }tt |dd|| W d    d S 1 s%w   Y  d S Nr   r   )X_multilabely_multilabelr7   r   r   rK  r   )r   rA  r_   r`   r   r}   r}   r~   test_no_sparse_y_supportu  s
   "r  c                  C   s  t dddd} | jdgdgdgdgdggg dg d	d
 t| jjg d t| jjjg d | jdgdgdgdgdggg dt	dd
 t| jjg d t| jjjg d | jdgdgdgdgdggg dd t| jjg d t| jjjg d dS )aQ	  Check MAE criterion produces correct results on small toy dataset:

    ------------------
    | X | y | weight |
    ------------------
    | 3 | 3 |  0.1   |
    | 5 | 3 |  0.3   |
    | 8 | 4 |  1.0   |
    | 3 | 6 |  0.6   |
    | 5 | 7 |  0.3   |
    ------------------
    |sum wt:|  2.3   |
    ------------------

    Because we are dealing with sample weights, we cannot find the median by
    simply choosing/averaging the centre value(s), instead we consider the
    median where 50% of the cumulative weight is found (in a y sorted data set)
    . Therefore with regards to this test data, the cumulative weight is >= 50%
    when y = 4.  Therefore:
    Median = 4

    For all the samples, we can get the total error by summing:
    Absolute(Median - y) * weight

    I.e., total error = (Absolute(4 - 3) * 0.1)
                      + (Absolute(4 - 3) * 0.3)
                      + (Absolute(4 - 4) * 1.0)
                      + (Absolute(4 - 6) * 0.6)
                      + (Absolute(4 - 7) * 0.3)
                      = 2.5

    Impurity = Total error / total weight
             = 2.5 / 2.3
             = 1.08695652173913
             ------------------

    From this root node, the next best split is between X values of 3 and 5.
    Thus, we have left and right child nodes:

    LEFT                    RIGHT
    ------------------      ------------------
    | X | y | weight |      | X | y | weight |
    ------------------      ------------------
    | 3 | 3 |  0.1   |      | 5 | 3 |  0.3   |
    | 3 | 6 |  0.6   |      | 8 | 4 |  1.0   |
    ------------------      | 5 | 7 |  0.3   |
    |sum wt:|  0.7   |      ------------------
    ------------------      |sum wt:|  1.6   |
                            ------------------

    Impurity is found in the same way:
    Left node Median = 6
    Total error = (Absolute(6 - 3) * 0.1)
                + (Absolute(6 - 6) * 0.6)
                = 0.3

    Left Impurity = Total error / total weight
            = 0.3 / 0.7
            = 0.428571428571429
            -------------------

    Likewise for Right node:
    Right node Median = 4
    Total error = (Absolute(4 - 3) * 0.3)
                + (Absolute(4 - 4) * 1.0)
                + (Absolute(4 - 7) * 0.3)
                = 1.2

    Right Impurity = Total error / total weight
            = 1.2 / 1.6
            = 0.75
            ------
    r   r4   rM   )rX   r   r   r<   r;   rL   )rJ   r]  r<   r8   r<   )333333?333333?r   rO   r  )r_   r`   r   )g,d?gܶm۶m?g?)      @g      @r  )ffffff?rN   gUUUUUU?)r8   rU   r  r^   N)
r   r   r   r   rw   r(   rx   rY  rr   r   )Zdt_maer}   r}   r~   test_mae  s    J.&r  c                  C   s   d} t jdt jd}d}dd }tjtj|fD ][}t D ]'\}}|| |}|| }|\}	\}
}}||	ks9J | |
ks?J t|| qt	 D ](\}}|| |}|| }|\}	\}
}}||	kseJ | |
kskJ ||ksqJ qIqd S )Nr<   r   r  c                 S   s   t t | S r  )r.  r0  r/  )objr}   r}   r~   _pickle_copy  s   z)test_criterion_copy.<locals>._pickle_copy)
rr   r   intpr  deepcopyr   r   
__reduce__r(   r   )	n_outputsr(  rY   r  Z	copy_func_typenamecriteriaresultZ	typename_
n_outputs_r7  Z
n_samples_r}   r}   r~   test_criterion_copy  s*   

r  c           	      C   s   t jdddd }t |d}|d d d df }| d ur&| |}|d d df }tdd||}||}t	t 
|jjtkd }||}t 
t |jj d }t|dkscJ t|dkskJ d S )Nr   r  rI   g*Gr   r?   r   )rr   rH  RandomStaterm  Z
nan_to_numr   r   r   r   setwherer   rq   r   
differenceisfinitert   r   )	r  r   r_   r`   r	   Zterminal_regionsZ	left_leafZ
empty_leafZinfinite_thresholdr}   r}   r~   "test_empty_leaf_infinite_threshold  s   

r  tree_clsc                 C      t |  } | d | d }}|ddd}|||}|j}|j}tt|dks+J tt|dks7J t|||| d S Nr_   r`   r[   r   r  r   Zcost_complexity_pruning_pathZ
ccp_alphas
impuritiesrr   r   diffassert_pruning_creates_subtreerr  r  r_   r`   r   infopruning_pathr  r}   r}   r~   'test_prune_tree_classifier_are_subtrees  s   r  c                 C   r  r  r  r  r}   r}   r~   'test_prune_tree_regression_are_subtrees$  s   r  c                  C   sX   t dd} | dgdggddg t ddd}|dgdggddg t| j|j d S )Nr   r   r9   rD   )rX   	ccp_alpha)r   r   assert_is_subtreer   )rO  r   r}   r}   r~   test_prune_single_node_tree5  s
   
r  c           	      C   sR   g }|D ]}| d|dd ||}|| qt|D ]\}}t|j|j qd S )Nr[   r   )r   r  rX   )r   r  r   r  r   )	Zestimator_clsr_   r`   r  Z
estimatorsr  r   Zprev_estZnext_estr}   r}   r~   r  A  s   r  c           	      C   s  | j |j ksJ | j|jksJ | j}| j}|j}|j}dg}|r| \}}t| j| |j|  t| j| |j|  t| j	| |j	|  t| j
| |j
|  || || krdtt|j|  n!t| j| |j|  ||| || f ||| || f |s!d S d S )N)r   r   )rn   r   rq   rp   popr'   rx   r&   rw   ru   r  r   rt   r  )	r	   ZsubtreeZtree_c_leftZtree_c_rightZsubtree_c_leftZsubtree_c_rightstackZtree_node_idxZsubtree_node_idxr}   r}   r~   r  P  s@   r  rj  ri  rH  c                 C   s   t d }|d jtjjdd}|d u rt|}n!||d }tj|jtjjd|_t|j|j	|j
f\|_|_	|_
ttjttjjd}t|  |d}||| t|||| t|| ||  d S )Nre   r_   Fr  r   )rj  )r   r   r	   r   r   r)   rr   r   r   r   r  r  r7   r   r(   r   r  Ztodense)r   rj  r  rr  r  Z
X_readonlyZ
y_readonlyr   r}   r}   r~   "test_apply_path_readonly_all_treesx  s(   
r  )r3   r5   r6   c                 C   sL   t jt j}}|| d}||| t||tt|ks$J d S )Nr   )	rb   r   r   r   rr   rv   r   r   r   )r   r$   r_   r`   r   r}   r}   r~   test_balance_property  s   
(r  seedc              	   C   s  ddgddgddgddgddgddgddgddgg}g d}t d| d}||| t||dks6J t d| d}||| t||dksNJ d	}tj|d d d
d||d d | d\}}d|d|k |dk @ < t|}t d| d}||| t||dksJ d S )Nr   r9   rM   r<   )r   r   r   r   r9   rM   r<   r8   r3   r   r6   rD   r  r   )Zeffective_rankZtail_strengthrY   rZ   r   rX   r?   )	r   r   rr   Zaminr   r   r   make_regressionr   )r  r_   r`   r   rZ   r}   r}   r~   test_poisson_zero_nodes  s,   4


	
r  c                  C   sB  t jd} d\}}}tj|| || d}| jdd|dt j|dd }| jt || d	}t	|||| d
\}}}	}
t
dd| d}t
dd| d}|||	 |||	 tdd||	}||	df||
dffD ]6\}}}t|||}t|t ||dd }t|||}|dkr|d| k sJ |d| k sJ qhd S )Nr   )  r  rD   rY   rZ   rX   rB   rM   )lowhighr]   r   r  )Zlam)Z	test_sizerX   r6   rD   )r   r   rX   r3   mean)ZstrategytraintestgV瞯<rK   g      ?)rr   rH  r  r   Zmake_low_rank_matrixuniformr  r6   r   r   r   r   r
   r   r   Zclip)r   Zn_trainZn_testrZ   r_   Zcoefr`   X_trainr  r   r   Ztree_poiZtree_msedummyvalZ
metric_poiZ
metric_mseZmetric_dummyr}   r}   r~   test_poisson_vs_mse  s6   

r  r(  c                 C   sz   d\}}t j||||ddd\}}| ddd||}| ddd||}t|j|j| d	 t|||| d
S )z3Test that criterion=entropy gives same as log_loss.)rC  r;   r   r   )r(  rY   rZ   r   r   rX   r2   +   r   entropyz> with criterion 'entropy' and 'log_loss' gave different trees.N)r   r   r   r   r   r   r   )r$   r(  rY   rZ   r_   r`   Ztree_log_lossZtree_entropyr}   r}   r~   'test_criterion_entropy_same_as_log_loss  s"   
r  c                     sv   t jdd\} }tddd  | |  | |}dd  fdd}t| }|| |}t||s9J d S )	Nr   r   r<   rp  c                 S   s   |   | j  S r  )byteswapviewr   newbyteorderr  )arrr}   r}   r~   reduce_ndarray  s   z8test_different_endianness_pickle.<locals>.reduce_ndarrayc                     sB   t  } t| }tj |_|jtj< |	  | 
d | S Nr   )ioBytesIOr.  Picklercopyregdispatch_tabler  rr   ndarraydumpseekfpr   r  r}   r~    get_pickle_non_native_endianness  s   


zJtest_different_endianness_pickle.<locals>.get_pickle_non_native_endianness)	r   r   r   r   r   r.  loadrr   isclose)r_   r`   r   r  new_clf	new_scorer}   r  r~    test_different_endianness_pickle  s   
r  c                     s~   t jdd\} }tddd| | | |}G dd dt  fdd}t| }|| |}t	||s=J d S )	Nr   r   r<   rp  c                       s   e Zd Z fddZ  ZS )zPtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPicklerc                    s0   t |tjr| |j }t | d S r  )	
isinstancerr   r  r  r  r   r  supersave)selfr  r2  r}   r~   r   (  s   zUtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPickler.save)__name__
__module____qualname__r   __classcell__r}   r}   r  r~   NonNativeEndiannessNumpyPickler'  s    r  c                     s(   t  }  | }| | d | S r  )r  r  r  r  r  r  r   r}   r~   'get_joblib_pickle_non_native_endianness-  s
   

zXtest_different_endianness_joblib_pickle.<locals>.get_joblib_pickle_non_native_endianness)
r   r   r   r   r   r   joblibr  rr   r  )r_   r`   r   r	  r  r  r}   r  r~   'test_different_endianness_joblib_pickle   s   r  c                 C   sn   t rtjntj}g d}dd | jj D }|D ]}|||< qtt| t|	 d}| j
|ddS )N)
left_childright_childrs   ru   c                 S      i | ]	\}\}}||qS r}   r}   r)  r   r   r  r}   r}   r~   r,  @  r-  z6get_different_bitness_node_ndarray.<locals>.<dictcomp>namesformats	same_kindZcasting)r,   rr   int64r  r   fieldsr   listr   valuesr   )node_ndarrayZnew_dtype_for_indexing_fieldsZindexing_field_namesnew_dtype_dictr   	new_dtyper}   r}   r~   "get_different_bitness_node_ndarray:  s   

r  c                 C   sj   dd | j j D }dd | j j D }dd |D }t t| t| |d}| j|ddS )	Nc                 S   r  r}   r}   r  r}   r}   r~   r,  M  r-  z8get_different_alignment_node_ndarray.<locals>.<dictcomp>c                 S   s   g | ]\}}|qS r}   r}   )r)  r   r  r}   r}   r~   r~  P      z8get_different_alignment_node_ndarray.<locals>.<listcomp>c                 S   s   g | ]}d | qS )rL   r}   )r)  r  r}   r}   r~   r~  Q  r  )r  r  offsetsr  r  )r   r  r   r  rr   r  r   r   )r  r  r  Zshifted_offsetsr  r}   r}   r~   $get_different_alignment_node_ndarrayL  s   


r  c           	      C   sZ   t rtjntj}|  \}\}}}}|j|dd}| }t|d |d< ||||f|fS )Nr  r  nodes)r,   rr   r  r  r  r   r  r  )	r	   r  r  rZ   r(  r  stateZnew_n_classes	new_stater}   r}   r~   "reduce_tree_with_different_bitness]  s   r#  c                     n   t jdd\} }tddd  | |  | |} fdd}t| }|| |}|t|ks5J d S )Nr   r   r<   rp  c                     s@   t  } t| }tj |_t|jt< |	  | 
d | S r  )r  r  r.  r  r  r  r  r#  
CythonTreer  r  r  r   r}   r~   "pickle_dump_with_different_bitnesso  s   



zItest_different_bitness_pickle.<locals>.pickle_dump_with_different_bitness)	r   r   r   r   r   r.  r  r   r   )r_   r`   r   r&  r  r  r}   r   r~   test_different_bitness_pickleh  s   
r'  c                     r$  )Nr   r   r<   rp  c                     s>   t  } t| }tj |_t|jt< |  | 	d | S r  )
r  r  r   r  r  r  r#  r%  r  r  r  r   r}   r~   "joblib_dump_with_different_bitness  s   


zPtest_different_bitness_joblib_pickle.<locals>.joblib_dump_with_different_bitness)	r   r   r   r   r   r
  r  r   r   )r_   r`   r   r(  r  r  r}   r   r~   $test_different_bitness_joblib_pickle~  s   
r)  c                  C   s  t rttjnttj} ttjttjg}|dd |D 7 }tjddg| d}|D ]
}t|||  q.tj	t
dd tjddgg| d}t||  W d    n1 sZw   Y  tj	t
dd |tj}t||  W d    d S 1 s}w   Y  d S )	Nc                 S   s   g | ]}|  qS r}   )r  )r)  dtr}   r}   r~   r~    r  z(test_check_n_classes.<locals>.<listcomp>r   r9   r   zWrong dimensions.+n_classesr   zn_classes.+incompatible dtype)r,   rr   r   r  r  r   r!   r   r   r   r   r@  )expected_dtypeallowed_dtypesr(  r*  Zwrong_dim_n_classesZwrong_dtype_n_classesr}   r}   r~   test_check_n_classes  s   "r-  c               	   C   s0  t t j} d}t j|| d}| |  g}|D ]	}t|||d qtjtdd t|| dd W d    n1 s:w   Y  |d d d d d df t 	|fD ]!}tjtdd t|| |j
d W d    n1 smw   Y  qQtjtd	d t|t j| |d W d    d S 1 sw   Y  d S )
N)r;   r9   rM   r   )r+  expected_shapezWrong shape.+value arrayr   )r9   rM   r9   zvalue array.+C-contiguouszvalue array.+incompatible dtype)rr   r   r@  rk   r  r#   r   r   r   r   r   r   r   )r+  r.  Zvalue_ndarrayr,  r*  Zproblematic_arrr}   r}   r~   test_check_value_ndarray  s:   (
"r/  c                  C   s  t } tjd| d}|t|t|g}|dd |D 7 }|D ]}t|| d qtjtdd tjd| d}t|| d W d    n1 sEw   Y  tjtd	d |d d d
 }t|| d W d    n1 siw   Y  dd |j	j
 D }| }tj|d< t	t| t| d}||}tjtdd t|| d W d    n1 sw   Y  | }tj|d< t	t| t| d}||}tjtdd t|| d W d    d S 1 sw   Y  d S )N)r;   r   c                 S   s   g | ]
}| |j qS r}   )r   r   r  )r)  r  r}   r}   r~   r~    s    z+test_check_node_ndarray.<locals>.<listcomp>)r+  zWrong dimensions.+node arrayr   )r;   rM   znode array.+C-contiguousrM   c                 S   r  r}   r}   r  r}   r}   r~   r,    s    z+test_check_node_ndarray.<locals>.<dictcomp>rt   r  znode array.+incompatible dtyper  )r   rr   rk   r  r  r"   r   r   r   r   r  r   r  r  r  r   r  r   r@  )r+  r  Zvalid_node_ndarraysr  Zproblematic_node_ndarrayZ
dtype_dictr  r  r}   r}   r~   test_check_node_ndarray  sL   



"r0  Splitterc           	      C   s   t jd}d}dt jddgt jd}}td ||}| ||dd|d	d
}t|}t|}|j	|ks7J t
|| s>J d	S )z&Check that splitters are serializable.r   rD   rM   r<   r   r1   r;   rK   N)Zmonotonic_cst)rr   rH  r  r   r  r   r.  r/  r0  r   r  )	r1  r   r   r  r(  r   rj  Zsplitter_serializeZsplitter_backr}   r}   r~   test_splitter_serializable 	  s   

r2  c                 C   sR   t | d}tdd}|tt t|| tj|dd}t	|j
|j
d dS )zhCheck that Trees can be deserialized with read only buffers.

    Non-regression test for gh-25584.
    z
clf.joblibr   r   r)Z	mmap_modez?The trees of the original and loaded classifiers are not equal.N)strjoinr   r   r  r  r
  r  r  r   r   )ZtmpdirZpickle_pathr   Z
loaded_clfr}   r}   r~   /test_tree_deserialization_from_read_only_buffer	  s   
r6  c                 C   s   t ddgddgg}t ddg}| dd|| | dd}d}tjt|d ||| W d   dS 1 s;w   Y  dS )zhCheck that an error is raised when min_sample_split=1.

    non-regression test for issue gh-25481.
    r   r9   rO   )r   zb'min_samples_split' .* must be an int in the range \[2, inf\) or a float in the range \(0.0, 1.0\]r   N)rr   r   r   r   r   r   )r$   r_   r`   r	   msgr}   r}   r~   test_min_sample_split_1_error%	  s   
"r8  c                 C   s   t g dgj}t g d}tdd| d}||| |t jgg}t|t |dd g |dd }|dd }tdd| d}||| |t jgg}t|t |d	d g dS )
z=Check missing values goes to correct node during predictions.	r   r9   rM   r<   rL   r   rI      r   	r   r>   r  r>   r  r  rN   g?g@r   r9   r  r=   Nr?   r:   )	rr   r   r   r   r   r   nanr   r  )r   r_   r`   dtcrt  ZX_equalZy_equalr}   r}   r~   ;test_missing_values_best_splitter_on_equal_nodes_no_missing;	  s   r>  c                 C   s   t g dgj}t g d}t|d| d}||| |jjd }|jjd }|jj| }|jj| }||k}	|jj	| d }
|jj	| d }|
t jgg}|	rZt|
| dS t|| dS )zCheck missing values go to the correct node during predictions for ExtraTree.

    Since ETC use random splits, we use different seeds to verify that the
    left/right node is chosen correctly when the splits occur.
    r9  r;  r9   r  r   N)rr   r   r   r   r   r   rq   rp   r  rx   r   r<  r   )r   r  r_   r`   Zetrr  r  Zleft_samplesZright_samplesZ	went_leftZy_pred_leftZy_pred_rightrt  r}   r}   r~   =test_missing_values_random_splitter_on_equal_nodes_no_missingU	  s   r?  r  r1   c                 C   s   d}t t jgd g d gj}t |gd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t||ddg d
S )zITest when missing values are uniquely present in a class among 3 classes.r   r8   )r   r9   rM   r<   rL   r   rI   r:  r9   rM   r   r  r<   r:  Nrr   r   r<  r   r   r   r   r(   )r   Zmissing_values_classr_   r`   r=  r  Z
y_nan_predr}   r}   r~   /test_missing_values_best_splitter_three_classesx	  s   $
rA  c                 C   s   t t jgd g d gj}t dgd dgd  }tdd| d}||| t t jd	t jggj}||}t|g d
 dS )zMissing values spanning only one class at fit-time must make missing
    values at predict-time be classified has belonging to this class.r8   r   r9   rM   r<   r8   r;   r   r9   rJ   r   rM   r  r;   )r   r9   r   Nr@  r   r_   r`   r=  r  rt  r}   r}   r~   )test_missing_values_best_splitter_to_left	  s   
rD  c                 C   s   t t jgd g d gj}t dgd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t|g d
 dS )zMissing values and non-missing values sharing one class at fit-time
    must make missing values at predict-time be classified has belonging
    to this class.r8   rB  r9   r   rM   r   r  rP   g333333@r  Nr@  rC  r}   r}   r~   *test_missing_values_best_splitter_to_right	  s   $
rE  c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }td
d| d}||| t t jddggj}||}t|g d dS )zNCheck behavior of missing value when there is one missing value in each class.r9   rM   r<   r;   rD   r[   rW   r   r   r   r  gffffff@gA@r  Nr@  rC  r}   r}   r~   >test_missing_values_best_splitter_missing_both_classes_has_nan	  s   &
rF  r	   r   c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }| d
ur(| |}tjtdd ||| W d
   d
S 1 sAw   Y  d
S )z4Check unsupported configurations for missing values.r9   rM   r<   r;   rD   r[   rW   r   r   NzInput X contains NaNr   )rr   r   r<  r   r   r   r   r   )r  r	   r_   r`   r}   r}   r~   test_missing_value_errors	  s   &"rG  c                 C   sp   t j t j}}tj|ddddf< tj|ddddf< | ddd}||| ||}|d	k s6J dS )
z5Smoke test for poisson regression and missing values.Nr;   r   rJ   r?   r6   r   r   rT   )	rb   r   r  r   rr   r<  r   r   r   )r$   r_   r`   r   rt  r}   r}   r~   test_missing_values_poisson	  s   
rH  c                  O   s$   t j| i |\}}|dk}||fS )N   )r   make_friedman1)argskwargsr_   r`   r}   r}   r~   make_friedman1_classification	  s   rM  zmake_data, Tree, tolerancegQ?gQ?gQ?sample_weight_trainr   c                 C   s  d\}}| ||d|d\}}|  }	tj|}
tj|	|
jddg|jddgd< t|	||d	\}}}}|d
kr@t|jd }nd}d}|||d}|j	|||d |
||}tt |||d}|	|| |
||}|| |ksJ d|d| d| dS )zFCheck that trees can deal with missing values have decent performance.)r   rD   rO   )rY   rZ   noiserX   FTrV   r   r]   r  r   r   r   NrD   r   r   zscore_native_tree=z + z! should be strictly greater than )r  rr   rH  r  r<  choicer   r   r   r   r   r   r   )	make_datar$   rN  r   Z	tolerancerY   rZ   r_   r`   	X_missingr   ZX_missing_trainZX_missing_testr   r   r   r   Znative_treeZscore_native_treeZtree_with_imputerZscore_tree_with_imputerr}   r}   r~   !test_missing_values_is_resilience	  s<   
 rT  zTree, expected_scoreg333333?g(\?c                 C   s   t jd}d}|j|dfd}t t |d t |d g}|jddg|dd	gd
}| 	t
}||  ||< |j|d}	t j|	|< |	|dddf< | |d}
t|
||dd }||kskJ d| d| dS )z@Check the tree learns when only the missing value is predictive.r   r  r[   r\   rM   FTgffffff?rQ   rP  Nr;   r   )ZcvzExpected CV score: z	 but got )rr   rH  r  Zstandard_normalr  rk   r   rQ  r  r   boolr<  r   r  )r$   Zexpected_scorer   r   rY   r_   r`   ZX_random_maskZy_maskZX_predictiver	   Ztree_cv_scorer}   r}   r~    test_missing_value_is_predictive 
  s   "


rV  zmake_data, Treec           
      C   s   t jd}d\}}| |||d\}}t j||jddg|jddgd< t |jd }d	|d
d
d< |dd}|j|||d |dd}	|	|dd
dd
d
f |dd
d  t|		||	| d
S )z=Check sample weight is correctly handled with missing values.r   )r   rD   r  FTrV   r   rP  rT   NrM   r   r   r9   )
rr   rH  r  r<  rQ  r   r   r   r   r   )
rR  r$   r   rY   rZ   r_   r`   r   Ztree_with_swZtree_samples_removedr}   r}   r~   test_sample_weight_non_uniform>
  s   	 

(rW  c                  C   sP   t ddtjtj} t ddtjtj}t| }t|}||ks&J d S r  )r   r   ra   r   r   r.  r/  )Ztree1Ztree2Zpickle1Zpickle2r}   r}   r~   test_deterministic_pickle[
  s
   

rX  r_   r;   rJ   c                 C   s   | dd}td}| |dd||}t|| dd|}|jj}t|dks1J | t	|jjdd |jjdd  t
|jjdk|jjdk@ }t	|jj| d dS )	a'  Check that we properly handle missing values in regression trees using a toy
    dataset.

    The regression targeted by this test was that we were not reinitializing the
    criterion when it comes to the number of missing values. Therefore, the value
    of the critetion (i.e. MSE) was completely wrong.

    This test check that the MSE is null when there is a single sample in the leaf.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    https://github.com/scikit-learn/scikit-learn/issues/28316
    r?   r9   rJ   r   r   NrM   rT   )rb  rr   r   r   r   r   rw   r   r   r   flatnonzerorq   ru   )r$   r_   r   r`   r	   Ztree_refrw   
leaves_idxr}   r}   r~   'test_regression_tree_missing_values_toyh
  s   
"r[  c                 C   s   t j| }d}t j|t jddd}t j|dd d d f< || t |}t| dd	||}|j
j}t|dksAJ |d S )	Nr  r   r?   r9   ir;   rp  r   )rr   rH  r  r   r@  rb  r<  r   r   r   r   rw   r   )r   r   rY   r_   r`   r	   rw   r}   r}   r~   -test_regression_extra_tree_missing_values_toy
  s   

r\  c                  C   s   t jdd\} }tjd}|  }|jtjdtjd| dddgf d d		t
}tj||< t||d
d\}}}}tjg dtjd}tdddd}	|	|| ||  t|	jjdks`J t|	jjdk|	jjdk@ }
t|	jj|
 d dS )a  Check that we properly handle missing values in classification trees using a toy
    dataset.

    The test is more involved because we use a case where we detected a regression
    in a random forest. We therefore define the seed and bootstrap indices to detect
    one of the non-frequent regression.

    Here, we check that the impurity is null or positive in the leaves.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    T)Z
return_X_yr   )r9   r8   )r   r   NrM   rL   )nr     r   )prM   Q   '   a   [   &   .      e   r^  Y   R   r  r   E      r_     I   J   3   /   k      K   n   r[   r   h   9      r   rr  O   #   M   Z   rn  rd  r^  ^   rb     rL   ]   r}  rl  ry  r:  r^  rm  m   rs     rD   r|  rt  rj  \   4   r[   r~  rL   rL      rj  rx  r:  r:  r]  r  r   rW   re  N   r:  r  i   rC  r   rl  r:  f   r  r^  re  r9   ri  rI       rr  rz  j   r{  r   8   rx  rq  >   U   r_  r`  P   rk  ?   rJ   r  T   r<   r<   L   r  r   r<   r   iHnr   r   r?   r9   rT   )r   	load_irisrr   rH  r  r  r  r   r  r   rU  r<  r   r   r   r   r   r   rw   rY  rq   ru   r   )r_   r`   r   rS  maskr  r  r   r   r	   rZ  r}   r}   r~   +test_classification_tree_missing_values_toy
  s,   "


r  c                  C   sH  t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< t|| j| | jjdks9J |jdks@J tt t| jj|j W d   n1 sXw   Y  t| jjd |jd  t| j	|| j
}tj| jjtjd}d|dd< t|| j| | jjdksJ |jdksJ |jt| jj|j dS )zHTest pruning a tree with the Python caller of the Cythonized prune tree.r   r9   rp  r   r<   N)r   r   ra   r   r   rr   
atleast_1dr7  r%  n_features_in_r  rk   r   rn   uint8r    r   r   AssertionErrorr(   rx   r	   r(  Zpruned_treeZleave_in_subtreer}   r}   r~   test_build_pruned_tree_py
  s(   r  c                  C   s   t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< tjtdd t|| j| W d   dS 1 sDw   Y  dS )z8Test pruning a tree does not result in an infinite loop.r   r9   rp  r   z,Node has reached a leaf in the original treer   N)r   r   ra   r   r   rr   r  r7  r%  r  r  rk   r   rn   r  r   r   r   r    r  r}   r}   r~   $test_build_pruned_tree_infinite_loop
  s   "r  c                  C   sf   t jd} | jddddt j}t |gd }t jdt jd}t	||d g d	}t
|| d
S )zNon-regression test for gh-30554.

    Using log2 and log in sort correctly sorts feature_values, but the tie breaking is
    different which can results in placing samples in a different order.
    rr  rT   g      $@rD   )locscaler]   r;   rC  r   )2r   (   rW   r[   rD      r`     1   r   -   r   rx  r;      rI   re  )   r9         r:  rM   r   r  r|  r^  r  r<   !   rJ   $   rd  rk  rv  r8   rI  r  "   ,   rj  ro  r]  %   rq  rL   rc  0   r     N)rr   rH  Zdefault_rngnormalr   r   r  r   r  r   r(   )r   ZsomeZfeature_valuesr  Zexpected_samplesr}   r}   r~   test_sort_log2_build  s   r  r  )__doc__r  r  r  r.  rI  rk  	itertoolsr   r   r   r
  numpyrr   r   Zjoblib.numpy_pickler   Znumpy.testingr   Zsklearnr   r   r	   Zsklearn.dummyr
   Zsklearn.exceptionsr   Zsklearn.imputer   Zsklearn.metricsr   r   r   Zsklearn.model_selectionr   r   Zsklearn.pipeliner   Zsklearn.random_projectionr   Zsklearn.treer   r   r   r   Zsklearn.tree._classesr   r   r   r   Zsklearn.tree._partitionerr   Zsklearn.tree._treer   r   r   r    r!   r"   r#   r$   r%  Zsklearn.utilsr%   Zsklearn.utils._testingr&   r'   r(   r)   r*   r+   Zsklearn.utils.fixesr,   r-   r.   r/   Zsklearn.utils.validationr0   r   ZREG_CRITERIONSr   r   dictr7   __annotations__updateZSPARSE_TREESr   r  r  Zy_small_regr_   r`   r   r   r  ra   rH  r  r   r  r   r]   permr   Zload_diabetesrb   Zload_digitsrc   rX   Zmake_multilabel_classificationr  r  r  ZX_sparse_posrE  Zy_randomr  ZX_sparse_mixrk   r   r   r   r   markZparametrizer  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	  r  r  r  r  r  r%  r4  r6  r9  r<  rB  rG  rL  rP  rR  rW  rX  r[  r\  r_  r`  rc  rg  ro  rv  ry  sortedr  intersectionrz  r{  r  r  zipr  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r'  r)  r-  r/  r0  r2  r6  r8  r>  r?  rA  rD  rE  rF  rG  rH  rM  rJ  rT  rV  r  r   rW  rX  r<  r[  r\  r  r  r  r  r}   r}   r}   r~   <module>   s   $	 

(

'



	
$B!;
=
H3<-4
/
		
)3
K


d
(
!*
!4!




 -!/