
    h                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d d	l'm(Z(m)Z)m*Z* d d
l+m,Z,m-Z-m.Z. d dl/m0Z0 dZ1h dZ2h dZ3 e4 ejj                  de             jm                         dk(  Z7de2 de3 Z8dee4   dee4   fdZ9	 dAdee4   de:de:de;de4f
dZ<dee4   de4fdZ=dej,                  dee;e;f   fdZ>d edefd!Z?d edefd"Z@d#e4d$e4d%ee;e4f   fd&ZA	 dBd'ee;e;f   d(eej                     d)e;d*e;dej                  f
d+ZC	 dCd'ee;e;f   d(eej                     d)e;d*e;dej                  f
d,ZD	 dCd'ee;e;f   d-eej                     d*e;deej                  ej                  f   fd.ZEd/e	de	fd0ZFdDd1e4d2eGdee4ef   fd3ZHdEd1ee4e	f   d4e4dee4ef   fd5ZI G d6 d7      ZJdFd8e4d9e4d:e;d;e;fd<ZKd/e	defd=ZLde4d/e	d>ed?e4fd@ZMy)G    N)
ThreadPool)Path)
is_tarfile)AnyDictListTupleUnion)ImageImageOps)check_class_names)DATASETS_DIRLOGGERMACOSNUM_THREADSROOTSETTINGS_FILETQDMYAML	clean_urlcolorstremojisis_dir_writeable)
check_file
check_fontis_ascii)downloadsafe_download
unzip_file)segments2boxeszJSee https://docs.ultralytics.com/datasets for dataset formatting guidance.>   bmpdngjpgmpopfmpngtifheicjpegtiffwebp>   tsasfavigifm4vmkvmovmp4mpgwmvmpegwebm
PIN_MEMORYtruezSupported formats are:
images: z	
videos: 	img_pathsreturnc           	      "   t         j                   dt         j                   t         j                   dt         j                   }}| D cg c]9  }|j                  |j                  |d            j                  dd      d   dz   ; c}S c c}w )zaConvert image paths to label paths by replacing 'images' with 'labels' and extension with '.txt'.imageslabels   .r   z.txt)ossepjoinrsplit)r:   sasbxs       T/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/ultralytics/data/utils.pyimg2label_pathsrI   ,   so    xvbffX&266(&(ABIRSABGGAHHRO$++C3A6?SSSs   >Bfilesthreshold_msthreshold_mb	max_filesprefixc           	         | rt        |       dk(  rt        j                  | d       yt        j                  | t        |t        |                   } g }g }g }| D ]  }	 t        j                         }	t        j                  |      j                  }
|j                  t        j                         |	z
  dz         |j                  |
       t        j                         }	t        |d      5 }|j                         }ddd       t        j                         |	z
  }|dkD  r|j                  |
dz  |z          |st        j                  | d       yt        j                   |      }t        |      dkD  rt        j"                  |d	      nd}d
t        j                   |      dz  dd}d|dd|dd}|rHt        j                   |      }t        |      dkD  rt        j"                  |d	      nd}d|dd|dd}nd}||k  s|k  r t        j$                  | d| | | d       yt        j                  | d| | | d       y# 1 sw Y   TxY w# t        $ r Y w xY w)ar  
    Check dataset file access speed and provide performance feedback.

    This function tests the access speed of dataset files by measuring ping (stat call) time and read speed.
    It samples up to 5 files from the provided list and warns if access times exceed the threshold.

    Args:
        files (List[str]): List of file paths to check for access speed.
        threshold_ms (float, optional): Threshold in milliseconds for ping time warnings.
        threshold_mb (float, optional): Threshold in megabytes per second for read speed warnings.
        max_files (int, optional): The maximum number of files to check.
        prefix (str, optional): Prefix string to add to log messages.

    Examples:
        >>> from pathlib import Path
        >>> image_files = list(Path("dataset/images").glob("*.jpg"))
        >>> check_file_speeds(image_files, threshold_ms=15)
    r   z%Image speed checks: No files to checkNi  rbi   z*Image speed checks: failed to access filesr?   )ddofz, size: i   .1fz KBzping:    ±z msz, read: z MB/s u   Fast image access ✅ ()zSlow image access detected (z). Use local storage instead of remote/mounted storage for better performance. See https://docs.ultralytics.com/guides/model-training-tips/)lenr   warningrandomsamplemintimeperf_counterrA   statst_sizeappendopenread	Exceptionnpmeanstdinfo)rJ   rK   rL   rM   rN   
ping_times
file_sizesread_speedsfstart	file_sizefile_obj_	read_timeavg_pingstd_pingsize_msgping_msg	avg_speed	std_speed	speed_msgs                        rH   check_file_speedsrw   2   sf   * CJ!O&!FGH MM%YE
!;<E JJK 	%%'E
**It002U:dBCi( %%'Ea $(MMO$))+e3I1}""9#89#DE$ &!KLM wwz"H-0_q-@rvvjq)aH"''*-9#>cBH~R~S9HGGK(	36{3Ca3GBFF;Q/Q	yoR	#eD		,)l":vh5hZ	{8*TUVWh28*YKz RK L	
9$ $
  		s+   BI,I=;II	I	I)(I)pathsc                 V   d}| D ]%  }	 |t        j                  |      j                  z  }' t	        d      j                  t        |      j                               }|j                  dj                  |       j                                |j                         S # t        $ r Y w xY w)z>Return a single hash value of a list of paths (files or dirs).r   hashlibrT   )rA   r]   r^   OSError
__import__sha256strencodeupdaterC   	hexdigest)rx   sizephs       rH   get_hashr      s    D 	BGGAJ&&&D
 	9$$SY%5%5%78AHHRWWU^""$%;;=	  		s   "B	B('B(imgc                     | j                   }| j                  dk(  r5	 | j                         x}r |j                  dd      }|dv r
|d   |d   f}|S |S # t        $ r Y |S w xY w)zReturn exif-corrected PIL size.JPEGi  N>         r?   r   )r   formatgetexifgetrb   )r   sexifrotations       rH   	exif_sizer      sz    A
zzV	{{}$t$88C.v%!ad
A H1H  	H	s   2A 	A A argsc                    | \  \  }}}d\  }}}	 t        j                  |      }|j                          t        |      }|d   |d   f}|d   dkD  |d   dkD  z  sJ d| d       |j                  j                         t        v sJ d|j                   dt                |j                  j                         d	v r|t        |d
      5 }	|	j                  dd       |	j                         dk7  rBt        j                  t        j                  |            j                  |ddd       | | d}ddd       d}||f|||fS # 1 sw Y   xY w# t        $ r}
d}| | d|
 }Y d}
~
-d}
~
ww xY w)zVerify one image.)r   r   rT   r?   r   	   image size  <10 pixelszInvalid image format . >   r#   r)   rP         r   d   subsamplingquality!: corrupt JPEG restored and savedN : ignoring corrupt image/label: )r   r`   verifyr   r   lowerIMG_FORMATSFORMATS_HELP_MSGseekra   r   exif_transposesaverb   )r   im_fileclsrN   nfncmsgimshaperj   es              rH   verify_imager      s   !NWcFKBCFZZ 
		"q58$a1qA.P+eWK0PP.yy K/h3HSUVfUg1hh/99??/gt$ Pr1668{*++EJJw,?@EEgvcdnqEr#HWI-NOC	P
  S>2r3&&P P  F	!A!EFs1   B9E 	A(E1
E EE 	E-E((E-c                    | \  }}}}}}}}dddddg df\  }	}
}}}}}	 t        j                  |      }|j                          t        |      }|d   |d   f}|d   dkD  |d   dkD  z  sJ d| d       |j                  j                         t        v sJ d|j                   d	t                |j                  j                         d
v r|t        |d      5 }|j                  dd       |j                         dk7  rBt        j                  t        j                  |            j                  |ddd       | | d}ddd       t        j                  j                  |      rd}
t        |d      5 }|j                         j!                         j#                         D cg c]  }t%        |      s|j'                           }}t)        d |D              r|st+        j,                  |D cg c]  }|d   	 c}t*        j.                        }|D cg c]:  }t+        j,                  |dd t*        j.                        j1                  dd      < }}t+        j2                  |j1                  dd      t5        |      fd      }t+        j,                  |t*        j.                        }ddd       t%              x}rg|rN|j6                  d   d||z  z   k(  sJ dd||z  z    d       |ddddf   j1                  d|      ddddf   }n5|j6                  d   dk(  sJ d|j6                  d    d       |ddddf   }|j9                         dk  sJ d||dkD             |j;                         dk\  sJ d ||dk             |r	d|dddf<   |dddf   j9                         }||k  sJ d!t=        |       d"| d#|dz
          t+        j>                  |dd$%      \  }}t%        |      |k  r||   }|r|D cg c]  }||   	 }}| | d&|t%        |      z
   d'}ngd}t+        j@                  d|rd||z  z   ndft*        j.                        }n3d}	t+        j@                  d|rd||z  z   ndft*        j.                        }|r|ddddf   j1                  d||      }|dk(  r_t+        jB                  |d(   dk  |d)   dk  z  d*d+      jE                  t*        j.                        }t+        j2                  ||d,   gd-      }|ddddf   }||||||	|
|||f
S # 1 sw Y   xY wc c}w c c}w c c}w # 1 sw Y   xY wc c}w # tF        $ r!}d}| | d.| }ddddd|	|
|||g
cY d}~S d}~ww xY w)/zVerify one image-label pair.r   rT   Nr?   r   r   r   zinvalid image format r   >   r#   r)   rP   r   r   r   r   r   r   r   utf-8encodingc              3   8   K   | ]  }t        |      d kD    yw)r   N)rV   .0rG   s     rH   	<genexpr>z%verify_image_label.<locals>.<genexpr>   s     .as1vz.s   dtype   zlabels require z columns eachzlabels require 5 columns, z columns detectedg)\(?z,non-normalized or out of bounds coordinates g{Gzznegative class labels zLabel class z exceeds dataset class count z. Possible class labels are 0-T)axisreturn_index: z duplicate labels removed).r   ).r?   g              ?).N)r   r   )$r   r`   r   r   r   r   r   r   r   ra   r   r   r   rA   pathisfilestrip
splitlinesrV   splitanyrc   arrayfloat32reshapeconcatenater    r   maxrZ   intuniquezeroswhereastyperb   )r   r   lb_filerN   keypointnum_clsnkptndim
single_clsnmr   ner   r   segments	keypointsr   r   rj   rG   lbclassesnlpointsmax_clsrn   ikpt_maskr   s                                rH   verify_image_labelr      s   JNGGWfhtZ/0!Q2r4/G,BBC9BCZZ 
		"q58$a1qA.P+eWK0PP.yy K/h3HSUVfUg1hh/99??/gt$ Pr1668{*++EJJw,?@EEgvcdnqEr#HWI-NOC	P 77>>'"Bg0 4A)*)9)D)D)FQA#a&aggiQQ.2.. hhb'9!'9LGZ\]UV12bjj A I I"a P]H]Q)?PXAY(Z\]^BXXb

34 W}r}88A;1td{?;oPQTX[_T_P_Naan=oo;12Y..r48BQB?F88A;!+h/I"((ST+Vg-hh+12YFzz|t+s/[\bcilpcp\q[r-ss+vvx5(S,B2b5j>BR*SS(  Bq!tHQT(,,.( "3w<.0MgY W33:Q;-A( yy!$?1q6B;AB9:#;AHQK#;#;#HWIRSV}<UVCXXqx1td{?QGrzzZB19q4$;!DBJJWB1ab5	))"dD9Iqy88Yv%6%:y?PST?T$UWZ\_`gghjhrhrsNNIx	7J+KRTU	2A2YE8YBBKKkP P R'9]	4 4< $<  C	!A!EdD$b"b"cBBCs   B9U A(T,?7U 6/U%T96T9)U1T>
=U?UAU/EU UDU ,T61U 9UUU 	V#U?9V?V
image_pathtxt_path	label_mapc                 2   ddl m} ddlm} t	        j
                  t        j                  |             }|j                  dd \  }}g }t        |d      5 }	|	D ]e  }
t        t        |
j                               \  }}}}}||dz  z
  |z  }||dz  z
  |z  }||z  }||z  }|j                  ||||t        |      f       g 	 ddd       |j                  d      \  }}|D ]  \  }}}}}t        d  ||d	      D              }|j!                  ||f||d|d
      }|j#                  |       d|d   z  d|d   z  z   d|d   z  z   }|j%                  ||dz
  ||   |dk  rdnd|        |j'                  |       |j)                          y# 1 sw Y   xY w)a  
    Visualize YOLO annotations (bounding boxes and class labels) on an image.

    This function reads an image and its corresponding annotation file in YOLO format, then
    draws bounding boxes around detected objects and labels them with their respective class names.
    The bounding box colors are assigned based on the class ID, and the text color is dynamically
    adjusted for readability, depending on the background color's luminance.

    Args:
        image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL.
        txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object.
        label_map (Dict[int, str]): A dictionary that maps class IDs (integers) to class labels (strings).

    Examples:
        >>> label_map = {0: "cat", 1: "dog", 2: "bird"}  # It should include all annotated classes details
        >>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map)
    r   N)colorsr   r   r   r?   c              3   &   K   | ]	  }|d z    yw)   N )r   cs     rH   r   z.visualize_image_annotations.<locals>.<genexpr>!  s     ;!a#g;s   Tnone)	linewidth	edgecolor	facecolorgz6?g,C?g]m{?r   g      ?whiteblack)colorbackgroundcolor)matplotlib.pyplotpyplotultralytics.utils.plottingr   rc   r   r   r`   r   mapfloatr   r_   r   subplotstuple	Rectangle	add_patchtextimshowshow)r   r   r   pltr   r   
img_height	img_widthannotationsfilelineclass_idx_centery_centerwidthheightrG   ywr   rn   axlabelr   rect	luminances                             rH   visualize_image_annotationsr     s   $ $1
((5::j)
*CIIbqMJ	K	h	) <T 	<D:=eTZZ\:R7Hh%EAI%2AFQJ&*4A	!A#A1aCM:;	<< LLOEAr( r1aE;veT':;;}}aVQQ%SY}Z
TU1X%q(99FU1X<MM	
1q5)E*Y_'RYkpqr IIcNHHJ!< <s   A+FFimgszpolygonsr   downsample_ratioc                 X   t        j                  | t         j                        }t        j                  |t         j                        }|j                  |j                  d   ddf      }t        j                  |||       | d   |z  | d   |z  }}t        j                  |||f      S )a  
    Convert a list of polygons to a binary mask of the specified image size.

    Args:
        imgsz (Tuple[int, int]): The size of the image as (height, width).
        polygons (List[np.ndarray]): A list of polygons. Each polygon is an array with shape (N, M), where
                                     N is the number of polygons, and M is the number of points such that M % 2 = 0.
        color (int, optional): The color value to fill in the polygons on the mask.
        downsample_ratio (int, optional): Factor by which to downsample the mask.

    Returns:
        (np.ndarray): A binary mask of the specified image size with the polygons filled in.
    r   r   r   r   )r   r?   )
rc   r   uint8asarrayint32r   r   cv2fillPolyresize)r  r  r   r  masknhnws          rH   polygon2maskr  *  s      88E*Dzz("((3H!2B :;HLLxu-Ah**E!H8H,HB::dRH%%    c                     t        j                  |D cg c]   }t        | |j                  d      g||      " c}      S c c}w )a  
    Convert a list of polygons to a set of binary masks of the specified image size.

    Args:
        imgsz (Tuple[int, int]): The size of the image as (height, width).
        polygons (List[np.ndarray]): A list of polygons. Each polygon is an array with shape (N, M), where
                                     N is the number of polygons, and M is the number of points such that M % 2 = 0.
        color (int): The color value to fill in the polygons on the masks.
        downsample_ratio (int, optional): Factor by which to downsample each mask.

    Returns:
        (np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
    r   )rc   r   r  r   )r  r  r   r  rG   s        rH   polygons2masksr"  C  s9      88\deWX\%!))B-%AQReffes   %A r   c                    t        j                  | d   |z  | d   |z  ft        |      dkD  rt         j                  nt         j                        }g }g }t        t        |            D ]m  }t        | ||   j                  d      g|d      }|j                  |j                  |j                               |j                  |j                                o t        j                  |      }t        j                  |       }t        j                  |      |   }t        t        |            D ]-  }	||	   |	dz   z  }||z   }t        j                  |d|	dz         }/ ||fS )z!Return a (640, 640) overlap mask.r   r?   r   r   r   )r  r   )a_mina_max)rc   r   rV   r  r  ranger  r   r_   r   r   sumr  argsortr   clip)
r  r   r  masksareasmssir  indexr   s
             rH   polygons2masks_overlapr/  V  s?    HH	q%	%uQx3C'CDh-#-bhh288E E	BCM" !EHRL$8$8$<#=P`hij
		$++ekk*+TXXZ ! JJuEJJvE	"e	B3x=! 5!uAQa!e45 %<r   r   c                    t        | j                  d            xs t        | j                  d            }|sJ d| j                          d       t	        |      dkD  r)|D cg c]  }|j
                  | j
                  k(  s|  }}t	        |      dk(  s'J d| j                          dt	        |       d|        |d   S c c}w )	a  
    Find and return the YAML file associated with a Detect, Segment or Pose dataset.

    This function searches for a YAML file at the root level of the provided directory first, and if not found, it
    performs a recursive search. It prefers YAML files that have the same stem as the provided path.

    Args:
        path (Path): The directory path to search for the YAML file.

    Returns:
        (Path): The path of the found YAML file.
    z*.yamlzNo YAML file found in ''r?   zExpected 1 YAML file in 'z', but found z.
r   )listglobrglobresolverV   stem)r   rJ   rj   s      rH   find_dataset_yamlr7  n  s     8$%Cdjj.B)CE=+DLLN+;1==5
5zA~!9qQVVtyy%899u:?k77G}UXY^U_T``cdicjkk?8O :s   $CCdatasetautodownloadc           
      	   t        |       }d}t        j                  |      st        |      r3t	        |t
        dd      }t        t
        |z        }|j                  d}}t        j                  |d      dD ]S  }|vs|dk7  sdvrt        t        |  d	| d
            t        j                  d       j                  d      d<   U dvrdvrt        t        |  d            dv rDdv r@t        d         d   k7  r,t        t        |  dt        d          dd    d            dvr#t!        d         D cg c]  }d| 	 c}d<   nt        d         d<   t#        d         d<   j%                  dd      d<   t'        |xs8 j%                  d      xs% t'        j%                  dd            j                        }|j)                         s'|j+                         st
        |z  j-                         }|d<   dD ]  }j%                  |      st/        |   t0              rb||   z  j-                         }|j)                         s-|   j3                  d      r||   dd z  j-                         }t1        |      |<   |   D cg c]  }t1        ||z  j-                                 c}|<    fddD        \  }	}
|	rt/        |	t4              r|	n|	gD cg c]  }t'        |      j-                          }	}t7        d |	D              s{t9        |       }t        j:                  d       d| d|	D cg c]  }|j)                         r| c}d    d }|
r|rt        j                  |       n|d!t
         d"t<         d z  }t?        |      tA        j@                         }d}|
j3                  d#      r$|
jC                  d$      rt	        |
t
        d%       nN|
j3                  d&      r/t        j:                  d'|
 d(       tE        jF                  |
      }ntI        |
d)i       d*tK        tA        j@                         |z
  d+       d,}|d-v rd.| d/tM        d0t
               nd1| d2}
t        j:                  d3|
 d4       tO        tQ        d         rd5       S d6       S c c}w c c}w c c}w c c}w )7ar  
    Download, verify, and/or unzip a dataset if not found locally.

    This function checks the availability of a specified dataset, and if not found, it has the option to download and
    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
    resolves paths related to the dataset.

    Args:
        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
        autodownload (bool, optional): Whether to automatically download the dataset if not found.

    Returns:
        (Dict[str, Any]): Parsed dataset information and paths.
    rT   TFdirunzipdelete)append_filename)trainvalrA  
validation 'uE   :' key missing ❌.
'train' and 'val' are required in all data YAMLs.zBrenaming data YAML 'validation' key to 'val' to match YOLO format.namesr   uI    key missing ❌.
 either 'names' or 'nc' are required in all data YAMLs.z 'names' length z
 and 'nc: z' must match.class_channels   r   	yaml_file)r@  rA  testminivalz../Nc              3   @   K   | ]  }j                  |        y wN)r   )r   rG   datas     rH   r   z$check_det_dataset.<locals>.<genexpr>  s     7adhhqk7s   )rA  r   c              3   <   K   | ]  }|j                           y wrL  )existsr   s     rH   r   z$check_det_dataset.<locals>.<genexpr>  s     +!188:+s   z	Dataset 'z"' images not found, missing path 'r   r1  z%
Note dataset download directory is 'z'. You can update this in 'http.zip)urlr<  r>  bash zRunning z ...yaml(r?   zs)>   Nr   u   success ✅ z, saved to boldzfailure u    ❌zDataset download 
z	Arial.ttfzArial.Unicode.ttf))r   zipfile
is_zipfiler   r   r   r7  parentr   loadSyntaxErrorr   r   rW   poprV   r&  r   r   r   rO  is_absoluter5  
isinstancer~   
startswithr2  allr   rf   r   FileNotFoundErrorr[   endswithrA   systemexecroundr   r   r   )r8  r9  r  extract_dirnew_dirkr   r   rG   rA  r   namemtrdtrM  s                   @rH   check_det_datasetro    s    gD K$:d#3,d5Q !78$(KK\ 99T40D  1D=Ez\5!gYb+qrs  NN_`((<0DK1 dt4/&G9,v!wxyy$44<CW,>$t*,L&G9,<Sg=O<PPZ[_`d[eZffs!tuvvd/4T$Z/@A!6!AWg'T
%d7m4DMxx
A.D Ytxx/Y4b8Q3R3Y3YZD;;=!1!1!3t#,,. DL. G88A;$q'3'DG^,,.xxzd1g&8&8&?Q+446Aa&Q>B1gF3q1134FQG 8#67FC
2<S$2G3cUTQtAw TT+s++W%DKKOD6!CPSDf1[\[c[c[eQDfghDiCjjklA\q!=l^Kfgtfuuvww'**		AA||F#

6(:!dCg&hqc./IIaLQ'U499;?A./r2BRSW`R`,rd+hv|.L-MNhpqspttxfyAKK+A3b12htG}5{OK <OOKg B, G
 U Egs   S*#S= SS
0S
r   c                 v	   t        |       j                  d      rt        | t        dd      } n8t        |       j	                  d      rt        |       }t        |t        dd      } t        |       } | j                         r| nt        | z  j                         }|j                         st        j                  d       t        j                  d| d       t        j                         }t        |       d	k(  r#t        j                  d
t        dz   dd       nd|  d}t!        ||j"                         t        j                  dt        j                         |z
  ddt%        d|       d       |dz  }|j                         st        j                  d|        t'        |j)                  d            t'        |j)                  d            z   }|r8ddlm} t        j                  dt/        |       d        ||d      }|dz  }nt        j0                  d| d        |d!z  j3                         r|d!z  n1|d"z  j3                         r|d"z  n|d#z  j3                         r|d#z  nd$}	|d%z  j3                         r|d%z  nd$}
|d!k(  r|	st        j                  d&       |
}	n|d%k(  r|
st        j                  d'       |	}
t/        |dz  j5                  d(      D cg c]  }|j                         s| c}      }|dz  j7                         D cg c]  }|j                         s|j8                  ! }}t;        t=        t?        |                  }||	|
d)jA                         D ]%  \  }}t%        | d*       d+| d,}|t        j                  |       3|j)                  d-      D cg c](  }|jB                  d.d$ jE                         tF        v s'|* }}t/        |      }t/        |D ch c]  }|j"                   c}      }|dk(  r5|dk(  rtI        |  d/| d0      t        j                  | d1| d2| d3       ||k7  r&t        j0                  | d1| d2| d4| d5| d6
       t        j                  | d1| d2| d7       ( ||	|
||d8d9S c c}w c c}w c c}w c c}w ):a  
    Check a classification dataset such as Imagenet.

    This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
    If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.

    Args:
        dataset (str | Path): The name of the dataset.
        split (str, optional): The split of the dataset. Either 'val', 'test', or ''.

    Returns:
        (Dict[str, Any]): A dictionary containing the following keys:

            - 'train' (Path): The directory path containing the training set of the dataset.
            - 'val' (Path): The directory path containing the validation set of the dataset.
            - 'test' (Path): The directory path containing the test set of the dataset.
            - 'nc' (int): The number of classes in the dataset.
            - 'names' (Dict[int, str]): A dictionary of class names in the dataset.
    )zhttp:/zhttps:/TFr;  )rQ  z.tarz.gzrT   z Dataset not found, missing path z, attempting download...imagenetrS  zdata/scripts/get_imagenet.sh)shellcheckz?https://github.com/ultralytics/assets/releases/download/v0.0.0/rQ  )r<  u   Dataset download success ✅ (rR   zs), saved to rV  rW  r@  z#Dataset 'split=train' not found at z*.jpgz*.pngr   )split_classify_datasetzFound z1 images in subdirectories. Attempting to split...g?)train_ratiozNo images found in z or its subdirectories.rA  rB  validNrI  z:Dataset 'split=val' not found, using 'split=test' instead.z:Dataset 'split=test' not found, using 'split=val' instead.*r@  rA  rI  : ...*.*r?   rC  z:' no training images foundz found z images in z classes (no images found)z classes (requires z classes, not rU   u    classes ✅ rG  )r@  rA  rI  r   rD  rF  )%r~   r`  r   r   rc  r   r   is_dirr5  r   rf   rW   r[   
subprocessrunr   r   rZ  r   r2  r4  ultralytics.data.splitrt  rV   errorrO  r3  iterdirrj  dict	enumeratesorteditemssuffixr   r   rb  )r8  r   r  data_dirrl  rR  	train_setimage_filesrt  val_settest_setrG   r   rD  ri  vrN   r   rJ   r   nds                        rH   check_cls_datasetr    sg   * 7|45\eT	W		6	7'",d5Q7mG">>+,2HRRTH??B9(C[\]IIKw<:%NNU4*H#H"IJRV^bcST[S\\`aCShoo.4TYY[1_S4IW_`fhpWqVrrtuv7"I<YKHI8>>'23d8>>';R6SSEKK&[!1 22cde-hCHH 7*ILL.xj8OPQ u$$& 	5 |#++- $ w&&(   &.%6$>$>$@x& dH~gST	&ST	(W,2237FA188:aF	GB&099;JqxxzQVVJEJ6%=)*E $GXFLLN P1s!W%&as#.9KK&'ggen_dAB8M8M8OS^8^T_E_UBe4ddkk45BQw<+wir!<W,XYYNNfXWRDB4Ga#bcrxwrd+bTATUWTXXfgifjjklmvhgbTRDNO!P$ wUZhijj/ GJ `4s*   (R'>R'R,4R,;(R1$R1>R6
c                   z    e Zd ZdZddededefdZedede	eeef   fd       Z
defd	Zdd
ededefdZdefdZy)HUBDatasetStatsaA  
    A class for generating HUB dataset JSON and `-hub` dataset directory.

    Args:
        path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip).
        task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'.
        autodownload (bool): Attempt to download dataset if not found locally.

    Attributes:
        task (str): Dataset task type.
        hub_dir (Path): Directory path for HUB dataset files.
        im_dir (Path): Directory path for compressed images.
        stats (Dict): Statistics dictionary containing dataset information.
        data (Dict): Dataset configuration data.

    Methods:
        get_json: Return dataset JSON for Ultralytics HUB.
        process_images: Compress images for Ultralytics HUB.

    Note:
        Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
        i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.

    Examples:
        >>> from ultralytics.data.utils import HUBDatasetStats
        >>> stats = HUBDatasetStats("path/to/coco8.zip", task="detect")  # detect dataset
        >>> stats = HUBDatasetStats("path/to/coco8-seg.zip", task="segment")  # segment dataset
        >>> stats = HUBDatasetStats("path/to/coco8-pose.zip", task="pose")  # pose dataset
        >>> stats = HUBDatasetStats("path/to/dota8.zip", task="obb")  # OBB dataset
        >>> stats = HUBDatasetStats("path/to/imagenet10.zip", task="classify")  # classification dataset
        >>> stats.get_json(save=True)
        >>> stats.process_images()
    r   taskr9  c                    t        |      j                         }t        j                  d| d       || _        | j                  dk(  rt        |      }t        |      }||d<   n`| j                  t        |            \  }}}	 t        j                  |      }d|d<   t        j                  ||       t        ||      }||d<   t        |d    d      | _        | j                  d	z  | _        t        |d
         t!        |d
   j#                               d| _        || _        y# t        $ r}	t        d      |	d}	~	ww xY w)zInitialize class.z Starting HUB dataset checks for z....classifyr   rT   zerror/HUB/dataset_stats/initNz-hubr=   rD  )r   rD  )r   r5  r   rf   r  r   r  _unzipr   r[  r   ro  rb   hub_dirim_dirrV   r2  valuesstatsrM  )
selfr   r  r9  	unzip_dirrM  rn   r  	yaml_pathr   s
             rH   __init__zHUBDatasetStats.__init__c  s,   Dz!!#6tfDAB	99
""4(I$Y/D$DL%)[[d%<"AxGyy+!V		)T*(LA'V tF|nD12llX-W.d7m>R>R>T9UV
	  G >?QFGs   AD' '	E0D<<Er;   c                     t        |       j                  d      sdd| fS t        | | j                        }|j	                         sJ d|  d| d       dt        |      t        |      fS )	zUnzip data.zip.rQ  FN)r   zError unzipping z, z6 not found. path/to/abc.zip MUST unzip to path/to/abc/T)r~   rc  r   rZ  r}  r7  )r   r  s     rH   r  zHUBDatasetStats._unzip~  sx     4y!!&)$$$t$++6	! 	
tfByk1gh	
! S^%6y%AAAr   rj   c                 \    t        || j                  t        |      j                  z         y)z)Save a compressed image for HUB previews.N)compress_one_imager  r   rj  )r  rj   s     rH   _hub_opszHUBDatasetStats._hub_ops  s    1dkkDGLL89r   r   verbosec                 R     fd}dD ]+  }d j                   |<    j                  j                  |      }|1t        |      j	                  d      D cg c](  }|j
                  dd j                         t        v s'|* }}|s j                  dk(  rddl	m
}  | j                  |         }	t        j                  t        |	j                              j                  t               }
|	j"                  D ]  }|
|d   xx   dz  cc<    t        |	      |
j%                         d	t        |	      d|
j%                         d
|	j"                  D cg c]  \  }}t        |      j&                  |i c}}d j                   |<   ddlm}  | j                  |    j                   j                        }	t        j,                  t/        |	j0                  t        |	      d      D cg c]J  }t        j2                  |d   j                  t               j5                          j                  d         L c}      }
t!        |
j7                               |
j7                  d      j%                         d	t        |	      t!        t        j8                  |
dk(  d      j7                               |
dkD  j7                  d      j%                         d
t;        |	j<                  |	j0                        D cg c]"  \  }}t        |      j&                   ||      i$ c}}d j                   |<   . |r j>                  jA                  dd        j>                  dz  }tC        jD                  d|jG                          d       tI        |dd      5 }tK        jL                   j                   |       ddd       |r5tC        jD                  tK        jN                   j                   dd              j                   S c c}w c c}}w c c}w c c}}w # 1 sw Y   bxY w)z(Return dataset JSON for Ultralytics HUB.c                    	j                   dk(  r| d   }n	j                   dv r!| d   D cg c]  }|j                          }}nm	j                   dk(  rE| d   j                  \  }}}t        j                  | d   | d   j                  |||z        fd      }nt        d	j                    d	      t        | d
   |      }|D cg c]  \  }}t        |d         gd |D         c}}S c c}w c c}}w )z:Update labels to integer class and 4 decimal place floats.detectbboxes>   obbsegmentr   poser   r?   zUndefined dataset task=r@   r   r   c              3   F   K   | ]  }t        t        |      d         yw)   N)rf  r   r   s     rH   r   z;HUBDatasetStats.get_json.<locals>._round.<locals>.<genexpr>  s     !E%a!"4!Es   !)	r  flattenr   rc   r   r   
ValueErrorzipr   )
r>   coordinatesrG   nnkr  zippedr   r   r  s
            rH   _roundz(HUBDatasetStats.get_json.<locals>._round  s    yyH$$X.004::4FGqqyy{GGf$";/55	2r nnfX.>{@S@[@[\]_adf_f@g-hjkl #:499+Q!GHH4FX^_91fS1YF!Ef!EF__ H `s   C-"C2rx  Nr|  r?   r  r   )ImageFolder)total	per_class)r  
unlabelledr  )instance_statsimage_statsr>   YOLODataset)img_pathrM  r  
Statisticsr  descr   r   )	minlengthTparentsexist_okz
stats.jsonzSaving r{  r  r   r   r   F)indent	sort_keys)(r  rM  r   r   r4  r  r   r   r  torchvision.datasetsr  rc   r   rV   r   r   r   imgstolistrj  ultralytics.datar  r   r   r>   bincountr  r'  ra  r  im_filesr  mkdirr   rf   r5  r`   jsondumpdumps)r  r   r  r  r   r   rj   rJ   r  r8  rG   r   ri  r  r  r  
stats_paths   `                rH   get_jsonzHUBDatasetStats.get_json  sT   	` , ,	E $DJJu99=='D | $T
 0 0 7_1188AB<;M;M;OS^;^Q_E_ yyJ&<%dii&67HHS1299#>!,, "BbeHMH" 14G188:&V-0\YZYaYaYc#d=D\\JTQQa0J%

5! 9%tyy/?diiVZV_V_`HH &*'..GS_%`! E%L$7$7$<$D$D$FRVR[R[\`Rab 14AEEG155QR8??K\&]!$W&)"&&a*;*?*?*A&B&'!e[[^%:%:%<$
 FIIYIY[b[i[iEjkTQQfQi8k%

5!I,	^ LLtd;4JKK'*"4"4"6!7s;<j#8 )A		$**a()KK

4::a5IJzza `" K l) )s+   (P?P!P.AP
;'P!PP&c                    ddl m} | j                  j                  dd       dD ]  }| j                  j                  |       || j                  |   | j                        }t        t              5 }t        |j                  | j                  |j                        t        |      | d	      D ]  } 	 ddd        t        j                  d
| j                          | j                  S # 1 sw Y   xY w)z$Compress images for Ultralytics HUB.r   r  Tr  rx  N)r  rM  z imagesr  zDone. All images saved to )r  r  r  r  rM  r   r   r   r   imapr  r  rV   r   rf   )r  r  r   r8  poolrn   s         rH   process_imageszHUBDatasetStats.process_images  s    0$6+ 	Eyy}}U#+!499U+;$))LGK( Ddiiw7G7GHPST[P\fkellsctu A 		 	0>?{{	 s   5AC22C;	N)z
coco8.yamlr  F)FF)__name__
__module____qualname____doc__r~   boolr  staticmethodr   r	   r  r  r   r  r  r   r   rH   r  r  @  s     DS s UY 6 BT BeD#tO4 B B:# :HT HD HT HT r   r  rj   f_newmax_dimr   c           	      "   	 dt         _        t        j                  |       }|j                  dv r|j	                  d      }|t        |j                  |j                        z  }|dk  r?|j                  t        |j                  |z        t        |j                  |z        f      }|j                  |xs | d|d       y# t        $ r}t        j                  d|  d	|        t        j                  |       }|j                   dd
 \  }}|t        ||      z  }|dk  r@t        j                  |t        ||z        t        ||z        ft        j"                        }t        j$                  t'        |xs |       |       Y d}~yd}~ww xY w)a;  
    Compress a single image file to reduced size while preserving its aspect ratio and quality using either the Python
    Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
    resized.

    Args:
        f (str): The path to the input image file.
        f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
        max_dim (int, optional): The maximum dimension (width or height) of the output image.
        quality (int, optional): The image compression quality as a percentage.

    Examples:
        >>> from pathlib import Path
        >>> from ultralytics.data.utils import compress_one_image
        >>> for f in Path("path/to/dataset").rglob("*.jpg"):
        >>>    compress_one_image(f)
    N>   LARGBARGBr   r   T)r   optimizezHUB ops PIL failure r   r   )interpolation)r   MAX_IMAGE_PIXELSr`   modeconvertr   r
  r	  r  r   r   rb   r   rW   r  imreadr   
INTER_AREAimwriter~   )	rj   r  r  r   r   rm  r   	im_heightim_widths	            rH   r  r    sG   $)!%ZZ]77n$E"Bc"))RXX..s7C1-s299q=/ABCB

FGdC )-aS1#67ZZ] hhrl	8c)X..s7BX\!2C	A4F GWZWeWefBC
OR(()s   B?C 	FB9F		Fc                     ddl }|j                          t        j                  t	        |       d      j                         }|j                          |S )z1Load an Ultralytics *.cache dictionary from path.r   NT)allow_pickle)gcdisablerc   r[  r~   itemenable)r   r  caches      rH   load_dataset_cache_filer    s9    JJLGGCID1668EIIKLr   rG   versionc                 x   ||d<   t        |j                        ro|j                         r|j                          t	        t        |      d      5 }t        j                  ||       ddd       t        j                  |  d|        yt        j                  |  d|j                   d       y# 1 sw Y   JxY w)z9Save an Ultralytics dataset *.cache dictionary x to path.r  wbNzNew cache created: zCache directory z# is not writeable, cache not saved.)r   rZ  rO  unlinkr`   r~   rc   r   r   rf   rW   )rN   r   rG   r  r  s        rH   save_dataset_cache_filer    s    AiL$;;=KKM#d)T" 	dGGD!	vh1$89&!1$++>abc		 	s   B00B9)
   2   r   rT   )r?   r?   )r?   )T)rT   )Ni  r  )Nr  rA   rX   r~  r[   rX  multiprocessing.poolr   pathlibr   tarfiler   typingr   r   r   r	   r
   r  numpyrc   PILr   r   ultralytics.nn.autobackendr   ultralytics.utilsr   r   r   r   r   r   r   r   r   r   r   r   ultralytics.utils.checksr   r   r   ultralytics.utils.downloadsr   r   r   ultralytics.utils.opsr    HELP_URLr   VID_FORMATSr~   getenvr   r8   r   rI   r   r   rw   r   r   r   r   r  ndarrayr  r"  r/  r7  r  ro  r  r  r  r  r  r   r   rH   <module>r
     sG    	     +   0 0 
   8    F E K K 0W_c<U34::<F
5k]*[MZ TtCy TT#Y T moK
9K
$)K
>CK
UXK
fiK
\
DI 
# 
5;; 5c? 'u ' '2GCU GCt GCT)C )3 )4PSUXPX> )Z ab&c?&&*2::&6&?B&Z]&ZZ&4 ]^gc?g&*2::&6g?BgVYgZZg( QRc?&*2::&6JM
2::rzz!"0D T *\s \$ \$sCx. \~[kuS$Y/ [k [kT#s(^ [k|d dN")# ")c ")3 ")PS ")J$ 4 
dC 
dt 
d 
ds 
dr   