
    h                         d Z ddlZddlmZmZmZmZmZ erddlm	Z	m
Z
 ddlmZ ddlmZ  ej                  e      Z G d de      Zy)	zRLoader that uses Selenium to load a page, then uses unstructured to load the html.    N)TYPE_CHECKINGListLiteralOptionalUnionChromeFirefox)Document)
BaseLoaderc                       e Zd ZdZdddddg fdee   deded   d	ee   d
ee   dedee   fdZ	de
d   fdZdede
d   defdZdee   fdZy)SeleniumURLLoadera  Load `HTML` pages with `Selenium` and parse with `Unstructured`.

    This is useful for loading pages that require javascript to render.

    Attributes:
        urls (List[str]): List of URLs to load.
        continue_on_failure (bool): If True, continue loading other URLs on failure.
        browser (str): The browser to use, either 'chrome' or 'firefox'.
        binary_location (Optional[str]): The location of the browser binary.
        executable_path (Optional[str]): The path to the browser executable.
        headless (bool): If True, the browser will run in headless mode.
        arguments [List[str]]: List of arguments to pass to the browser.
    TchromeNurlscontinue_on_failurebrowser)r   firefoxbinary_locationexecutable_pathheadless	argumentsc                     	 ddl }	 ddl}	|| _        || _        || _        || _        || _        || _        || _	        y# t        $ r t        d      w xY w# t        $ r t        d      w xY w)z4Load a list of URLs using Selenium and unstructured.r   NzIselenium package not found, please install it with `pip install selenium`zQunstructured package not found, please install it with `pip install unstructured`)
seleniumImportErrorunstructuredr   r   r   r   r   r   r   )
selfr   r   r   r   r   r   r   r   r   s
             o/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/url_selenium.py__init__zSeleniumURLLoader.__init__   s    		 	#6 .. ")  	) 	  	- 	s   = A AA*returnr   c                 B   | j                   j                         dk(  rddlm} ddlm} ddlm}  |       }| j                  D ]  }|j                  |        | j                  r"|j                  d       |j                  d       | j                  | j                  |_        | j                  	 ||      S  || || j                  	      
      S | j                   j                         dk(  rddlm} ddlm} ddlm}  |       }| j                  D ]  }|j                  |        | j                  r|j                  d       | j                  | j                  |_        | j                  	 ||      S  || || j                  	      
      S t!        d      )a  Create and return a WebDriver instance based on the specified browser.

        Raises:
            ValueError: If an invalid browser is specified.

        Returns:
            Union[Chrome, Firefox]: A WebDriver instance for the specified browser.
        r   r   )r	   )Options)Servicez
--headlessz--no-sandbox)options)r   )r#   servicer   )r
   z5Invalid browser specified. Use 'chrome' or 'firefox'.)r   lowerselenium.webdriverr	   !selenium.webdriver.chrome.optionsr!   !selenium.webdriver.chrome.servicer"   r   add_argumentr   r   r   r
   "selenium.webdriver.firefox.options"selenium.webdriver.firefox.service
ValueError)	r   r	   ChromeOptionsr"   chrome_optionsargr
   FirefoxOptionsfirefox_optionss	            r   _get_driverzSeleniumURLLoader._get_driverB   s{    <<8+1RA*_N~~ 1++C01 }}++L9++N;##/151E1E.##+n55&0D0DE  \\!Y.2TB,.O~~ 2,,S12 }},,\:##/262F2F/##+77'0D0DE 
 TUU    urldriverc                 ^   ddl m} ddlm} 	 |dddd}|j                  x}r||d<   	 |j                  |j                  d	      x}r|j                  d
      xs d|d<   	 |j                  |j                  d      x}r|j                  d      xs d|d<   |S # |$ r Y @w xY w# |$ r Y |S w xY w)Nr   )NoSuchElementException)ByzNo title found.zNo description found.zNo language found.)sourcetitledescriptionlanguager:   z//meta[@name="description"]contentr;   htmllangr<   )	selenium.common.exceptionsr7   selenium.webdriver.common.byr8   r:   find_elementXPATHget_attributeTAG_NAME)	r   r4   r5   r7   r8   metadatar:   r;   html_tags	            r   _build_metadataz!SeleniumURLLoader._build_metadataw   s    E3A&2,	
 LL 5  %HW	$117 {   --i8S<S '
	!..r{{FCCxC**62J6J $
  & 		 & 		s#   6B  6B# B B #B,+B,c           	         ddl m} t               }| j                         }| j                  D ]  }	 |j                  |       |j                  } ||      }dj                  |D cg c]  }t        |       c}      }| j                  ||      }	|j                  t        ||	              |j!                          |S c c}w # t        $ r4}
| j                  rt        j                  d| d|
        n|
Y d}
~
d}
~
ww xY w)	zLoad the specified URLs using Selenium and create Document instances.

        Returns:
            List[Document]: A list of Document instances with loaded content.
        r   )partition_html)textz

)page_contentrF   zError fetching or processing z, exception: N)unstructured.partition.htmlrJ   listr2   r   getpage_sourcejoinstrrH   appendr   	Exceptionr   loggererrorquit)r   rJ   docsr5   r4   rL   elementselrK   rF   es              r   loadzSeleniumURLLoader.load   s     	?#v!!#99 	C

3%11)|<{{h#?CG#?@//V<H$JK	 	 $@  ++LL#@]STRU!VWG Xs)   5C&C
85CC	D*C>>D)__name__
__module____qualname____doc__r   rR   boolr   r   r   r   r2   dictrH   r   r\    r3   r   r   r      s    " %)08)-)-!!#3i!# "!# ,-	!#
 "#!# "#!# !# 9!#F3VU#67 3Vj3 6I0J t >d8n r3   r   )r`   loggingtypingr   r   r   r   r   r&   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   	getLoggerr]   rU   r   rc   r3   r   <module>ri      s=    X  @ @2 - @			8	$`
 `r3   