
    h                         d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ  G d d	ee      Zy)
    N)ABC)Path)IteratorListSetTuple)Document)BaseBlobParser)Blobc                       e Zd ZdZdedee   fdZdedee   fdZde	j                  dedeeeeef      fdZd	ede	j                  d
ee   dee   dee   f
dZy)
VsdxParserzParser for vsdx files.blobreturnc                 $    | j                  |      S )zParse a vsdx file.)
lazy_parse)selfr   s     o/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/parsers/vsdx.pyparsezVsdxParser.parse   s    t$$    c              #   h  K   |j                         5 }t        j                  |d      5 }| j                  ||j                        }ddd       ddd       D cg c]!  \  }}}t        ||j                  ||d      # c}}}E d{    y# 1 sw Y   KxY w# 1 sw Y   OxY wc c}}}w 7 $w)zoRetrieve the contents of pages from a .vsdx file
        and insert them into documents, one document per page.rN)sourcepage	page_name)page_contentmetadata)as_bytes_iozipfileZipFileget_pages_contentr   r	   )r   r   pdf_file_objzfilepagespage_numberr   r   s           r   r   zVsdxParser.lazy_parse   s       	C<s3 Cu..udkkBC	C 9>

 

 5Y )"kk'!*

 
	
 
	
C C	C 	C

 
	
sK   B2BBBB2&B)B2B0B2B	BB&"B2r"   r   c                    	 ddl }d|j                         vrt        dj	                  |             yd|j                         vrt        dj	                  |             yd|j                         vrt        d	j	                  |             y|j                  |j                  d            }|j                  |j                  d            }|j                  |j                  d            }t        |d
   d   t              r'|d
   d   D cg c]  }|d   j                          }}n|d
   d   d   j                         g}t        |d   d   t              r|d   d   D cg c]
  }d|d   z    }	}nd|d   d   d   z   g}	|d   d   d   d   dt        |       }
|
D cg c]  }|j                          }
}|
D cg c]$  }|	|j                  |j                                  & }}g }|D ]  }|j                  |      }t        j                  |j                  |            }t        j                  d|      }t        |      dkD  s]dj!                  |      }ddddddd}|j#                         D ]  \  }}|j%                  ||      } |j'                  ||d        |D cg c]e  }dt)        |      j*                   d|j                         v r:||j                  |j                  dt)        |      j*                   d            d g }}g }t-        t/        ||
            D ]u  \  }\  }}| j1                  ||||      }dj!                  |D cg c]  }|d!   |v r|d"    c}|D cg c]  }|d!   |k(  r|d"    c}z         }|j'                  |||f       w |S # t        $ r t        d      w xY wc c}w c c}w c c}w c c}w c c}w c c}w c c}w )#a  Get the content of the pages of a vsdx file.

        Attributes:
            zfile (zipfile.ZipFile): The vsdx file under zip format.
            source (str): The path of the vsdx file.

        Returns:
            list[tuple[int, str, str]]: A list of tuples containing the page number,
            the name of the page and the content of the page
            for each page of the vsdx file.
        r   NzfThe xmltodict library is required to parse vsdx files. Please install it with `pip install xmltodict`.zvisio/pages/pages.xmlz'WARNING - No pages.xml file found in {}z visio/pages/_rels/pages.xml.relsz,WARNING - No pages.xml.rels file found in {}zdocProps/app.xmlz%WARNING - No app.xml file found in {}PagesPagez@NameRelationshipsRelationshipzvisio/pages/@Target
PropertiesTitlesOfPartsz	vt:vectorzvt:lpstrz("#text"\s*:\s*"([^\\"]*(?:\\.[^\\"]*)*)"
	-'   é   ô)z\nz\tz\u2013z\u2019z\u00e9rz\u00f4me)r   r   zvisio/pages/_rels/z	.xml.rels)pathcontentr   r   )	xmltodictImportErrornamelistprintformatr   read
isinstanceliststriplenindexjsondumpsrefindalljoinitemsreplaceappendr   stem	enumeratezipget_relationships)r   r"   r   r5   pagesxml_contentappxml_contentpagesxmlrels_contentreldisordered_namesdisordered_pathsordered_namesnameordered_pathsdisordered_pagesr3   r4   string_contentsamplesr   map_symboleskeyvalue	page_pathpagexml_relsordered_pagesr$   r   relationshipspage_s                                r   r    zVsdxParser.get_pages_content(   s   	 #%..*::;BB6JK-U^^5EE@GGOPU^^%559@@HI!*<S1T!U(uzz:L/MN%.__JJ9:&
 &w/7>0@0I&0Q+),G""$+ +
 !)&1':@@B+ *?;NKTR 0@P+ Y/+ + &7G	RS+ $2,#?#P$

$-,-$/ 3@@$@@ &
 -33DJJLAB
 
 ! 	VDjj&G!ZZ	(@ANjj;^G 7|a#yy1"" $!%  #/"4"4"6 DJC#/#7#7U#CLD !''|(TU)	V< +	
 #DO$8$8#9Cu~~GWW "$??JJ!3DO4H4H3IST	
 	
 57.7}-/
 	I*K*$	 !22e]LM  99 "2V}5 .) "2V}, .)	L   +y,!GH%	I( W  	B 	.++ A
<	
(
s6   N' N?$O#O	 )O*A*OO
2O'N<r   filelistr\   c           	         t              j                  }t              j                  }|d| dz  }t        |      |j	                         vr
t               S t        fd|D              }t        |d   d   t              r|d   d   D 	cg c]  }	|	d   	 }
}	n|d   d   d   g}
t        |
D cg c]  }t        ||z         c}      j                  |      }|D ]  }	|| j                  |	|||      z  } |S c c}	w c c}w )a  Get the relationships of a page and the relationships of its relationships,
        etc... recursively.
        Pages are based on other pages (ex: background page),
        so we need to get all the relationships to get all the content of a single page.
        z_rels/z.relsc              3   :   K   | ]  }|d    k(  s|d     yw)r3   r4   N ).0r_   r   s     r   	<genexpr>z/VsdxParser.get_relationships.<locals>.<genexpr>   s&      $
!&%-4:OE)$
s   
r(   r)   r*   )r   rS   parentstrr7   setnextr;   r<   intersectionrK   )r   r   r"   r`   r\   	name_pathparent_path	rels_pathpagexml_rels_contentrO   targetstargetr^   s    `           r   rK   zVsdxParser.get_relationships   s5    JOO	4j''F9+U";;	y>!115L# $
*6$
  
 *?;NKTR 0@P IG 
 ,O<^LYWXG5<=6Sv%&=

,x
  	 ! 	C)D,B,BUHl- M	
 ! >s   C=2DN)__name__
__module____qualname____doc__r   r   r	   r   r   r   r   rg   r   r   intr    dictr   rK   rc   r   r   r   r      s     %$ %8H#5 %
t 
(: 
(|__|.1|	eCcM"	#||)) ) s)	)
 4j) 
S)r   r   )r@   rB   r   abcr   pathlibr   typingr   r   r   r   %langchain_community.docstore.documentr	   )langchain_community.document_loaders.baser
   1langchain_community.document_loaders.blob_loadersr   r   rc   r   r   <module>r}      s3     	    - - : D BB Br   