
    Nh#                     B   d dl Z d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlZ G d d      Zedk(  r`d	Zd d
lmZ  eddd      Z eeddddg      Zed    Zej/                  ed      \  ZZej5                  d        edd        ede       yy)    N)Path)Image)ListOptionalTupleUnion)YOLOc                   *   e Zd Z	 	 	 ddedededeee      fdZdej                  de	j                  fd	Zd
eeeej                  f   dee	j                  ef   fdZde	j                  fdZdd
eeeej                  f   dee   deeee   f   fdZy)
PredictionN
model_path
output_dirconf_threshallowed_labelsc                     t        |      | _        t        |      | _        | j                  j	                  dd       t        |      | _        || _        y)a  
        model_path: path to your YOLO .pt
        output_dir: where crops + blacked image are saved
        conf_thresh: minimum confidence threshold for saving crops
        allowed_labels: list of class names to accept (e.g. ["Figure","Table"]); None => accept all
        T)parentsexist_okN)r	   modelr   r   mkdirfloatr   r   )selfr   r   r   r   s        ,/var/www/html/eduruby.in/utils/get_layout.py__init__zPrediction.__init__   sH     *%
z*dT: -,    pil_imgreturnc                     t        j                  |j                  d            }t        j                  |t        j
                        }|S )z,Convert PIL image to OpenCV BGR numpy array.RGB)nparrayconvertcv2cvtColorCOLOR_RGB2BGR)r   r   rgbbgrs       r   _pil_to_bgrzPrediction._pil_to_bgr   s4    hhwu-.ll3 1 12
r   	img_inputc                 b   t        |t        j                        r9| j                  |      }dt        j                         j
                  dd  }||fS t        |      }|j                         st        d|       t        j                  |      }| j                  |      }||j                  fS )z
        Reads input which can be:
         - a PIL Image object
         - a path (str or Path) to an image file
        Returns: (img_bgr, base_name) where base_name is used for saved filenames.
        page_N   zImage path not found: )
isinstancer   r&   uuiduuid4hexr   existsFileNotFoundErroropenstem)r   r'   img_bgr	base_namepathpils         r   _read_image_bgrzPrediction._read_image_bgr$   s     i-&&y1G

 0 0! 456II%% I{{}#&<TF$CDDjj""3'		!!r   	image_rgbc                 `    | j                   j                  |d      }t        |      r|d   S dS )z
        Run YOLO on an RGB numpy array and return first result (ultralytics).
        We pass the numpy array directly to keep coordinate system consistent.
        F)sourceverboser   N)r   predictlen)r   r8   resultss      r   _predictzPrediction._predict9   s2    
 **$$Iu$E \wqz3t3r   img_namec                 H   | j                  |      \  }}|r|n|}|j                  dd \  }}t        j                  |t        j                        }| j                  |      }	|	$t        |	d      rt        |	j                        dk(  rt        j                  |      }
|
g fS g }d}|	j                  D ]I  }	 |j                  d   j                         }|D cg c]  }t        t!        |             c}\  }}}}t#        dt%        |dz
  |            }t#        dt%        |dz
  |            }t#        dt%        |dz
  |            }t#        dt%        |dz
  |            }||k  s||k  rd}d}	 t        |j&                  d   j)                               }	 t        |j*                  d   j)                               }|	j,                  |   }|| j.                  k  r*| j0                  || j0                  vrFd}t#        dt%        |dz
  ||z               }t#        dt%        |dz
  ||z
              }t#        dt%        |dz
  |d|z  z               }t#        dt%        |dz
  |d|z  z
              }||k  s||k  r|||||f   j3                         }|dz  }| d	| d
| d
t        |dz         d}| j4                  |z  }t        j6                  t9        |      |       |j;                  t9        |             L t        j                  |      }
|
|fS # t        $ r/ |j                  d   D cg c]  }t        |       nc c}w }}Y {w xY wc c}w # t        $ r/ 	 t        |j&                  d         }n# t        $ r d}Y nw xY wY w xY w# t        $ r d}Y w xY w)z
        img_input: PIL.Image or path to image
        img_name: optional base name for saved files (overrides path stem / random)
        Returns: (blacked_image_path, [crop_path1, crop_path2, ...])
        N   boxesr      g        obj
   _crop__d   z.png)r7   shaper!   r"   COLOR_BGR2RGBr?   hasattrr=   rC   r   	fromarrayxyxytolist	Exceptionr   introundmaxminconfitemclsnamesr   r   copyr   imwritestrappend)r   r'   r@   r3   	auto_stem	base_stemhwimg_rgbresultr   
crop_pathscrop_idxboxcoordsxcx1y1x2y2rU   labelclass_idcrop_paddingcrop	crop_name	crop_paths                               r   generatezPrediction.generateA   s    "11)< (Hi	}}Ra 1 ,,w(9(9: w' >!9S=NRS=S__W-GB; "
 << :	.C9!++-
 6<<c%(m<NBB QAE2'BQAE2'BQAE2'BQAE2'BRx28 DESXXa[--/0swwqz01X. d&&&"".5@S@S3SL QAE2#456BQAE2#456BQAE2,#678BQAE2,#678BRx28 2b5"R%<(--/DMH$+VH:QugQs48}oTRI)3IKKI-c)n-u:	.H )
""C  9,/HHQK8q%(8889 =   !-D  D  sl   6LM.&M5NM1MMM	N!M:9N:NNNNNN! N!)detected_crops?N)N)__name__
__module____qualname__r[   r   r   r   r   r   r   ndarrayr&   r   r   r   r7   r?   rs    r   r   r   r   
   s     + .2-- - 	-
 !c+-&5;; 2:: "sD%++/E)F "5QSQ[Q[]`Q`Ka "*4"** 4`#%T5;;(>"? `#8TW= `#dijmostwoxjxdy `#r   r   __main__zlayout_detector_model.pt)convert_from_pathzbook.pdf   zC:/poppler-23.05.0/Library/bin)dpipoppler_path	crops_outru   FigureTable)r   r   r   page_001)r@   zcrops_out/page_001_blacked.pngzBlacked image saved as:zCrops:)r!   numpyr   pathlibr   PILr   typingr   r   r   r   ultralyticsr	   r,   r   rv   
model_file	pdf2imager|   pagesppage_imgrs   blacked_imgcropssaveprintrz   r   r   <module>r      s    
    / /   W# W#z z+J , 6E 	 '*		A QxHHzBK 56	
#%EF	(E= r   