
    h                     N    d dl Z d dlmZmZmZmZmZ d dlmZm	Z	  G d de      Z
y)    N)AnyListOptionalSequenceUnion)BaseDocumentTransformerDocumentc                       e Zd ZdZ	 	 	 	 ddeeeee   f      deeeee   f      dedede	ddfd	Z
d
ee   de	dee   fdZy)MarkdownifyTransformeram  Converts HTML documents to Markdown format with customizable options for handling
    links, images, other tags and heading styles using the markdownify library.

    Arguments:
        strip: A list of tags to strip. This option can't be used with the convert option.
        convert: A list of tags to convert. This option can't be used with the strip option.
        autolinks: A boolean indicating whether the "automatic link" style should be used when a a tag's contents match its href. Defaults to True.
        heading_style: Defines how headings should be converted. Accepted values are ATX, ATX_CLOSED, SETEXT, and UNDERLINED (which is an alias for SETEXT). Defaults to ATX.
        kwargs: Additional options to pass to markdownify.

    Example:
        .. code-block:: python
            from langchain_community.document_transformers import MarkdownifyTransformer
            markdownify = MarkdownifyTransformer()
            docs_transform = markdownify.transform_documents(docs)

    More configuration options can be found at the markdownify GitHub page:
    https://github.com/matthewwithanm/python-markdownify
    Nstripconvert	autolinksheading_stylekwargsreturnc                     t        |t              r|gn|| _        t        |t              r|gn|| _        || _        || _        || _        y )N)
isinstancestrr   r   r   r   additional_options)selfr   r   r   r   r   s         s/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_transformers/markdownify.py__init__zMarkdownifyTransformer.__init__   sC     !+5# 6eWE
$.w$<y'"*"(    	documentsc           
         	 ddl m } g }|D ]  } |d
|j                  | j                  | j                  | j
                  | j                  d| j                  j                  dd      j                         }t        j                  dd|      }|j                  t        ||j                  	              |S # t        $ r t        d      w xY w)Nr   )markdownifyz`markdownify package not found, please 
                install it with `pip install markdownify`)htmlr   r   r   r        z\n\s*\nz

)metadata )r   ImportErrorpage_contentr   r   r   r   r   replaceresubappendr	   r    )r   r   r   r   converted_documentsdocmarkdown_contentcleaned_markdowns           r   transform_documentsz*MarkdownifyTransformer.transform_documents*   s    
	/ ! 	C ))** LL"nn"&"4"4 -- %   "vvj&:JK&&)CLLA!	( #"7  	= 	s   B8 8C)NNTATX)__name__
__module____qualname____doc__r   r   r   r   boolr   r   r   r	   r,   r!   r   r   r   r      s    , 2637")c49n-.) %T#Y/0) 	)
 ) ) 
)"#H%"# "# 
(		"#r   r   )r%   typingr   r   r   r   r   langchain_core.documentsr   r	   r   r!   r   r   <module>r5      s     	 7 7 FE#4 E#r   