
    hk                         d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
  ej                  d      Z ej                  d      Z ej                  d      Z G d d	e
      Zy)
    N)ListTuple)Document)
BaseLoaderzBV\w+zav[0-9]+zp=(\d+)c            	       b    e Zd ZdZ	 	 	 ddee   dededefdZdee   fdZd	ede	ee
f   fd
Zy)BiliBiliLoaderz9
    Load fetching transcripts from BiliBili videos.
    
video_urlssessdatabili_jctbuvid3c                     || _         d| _        	 ddlm} |r |r|r|j                  |||      | _        yyyy# t        $ r t	        d      w xY w)a  
        Initialize the loader with BiliBili video URLs and authentication cookies.
        if no authentication cookies are provided, the loader can't get transcripts
        and will only fetch videos info.

        Args:
            video_urls (List[str]): List of BiliBili video URLs.
            sessdata (str): SESSDATA cookie value for authentication.
            bili_jct (str): BILI_JCT cookie value for authentication.
            buvid3 (str): BUVI3 cookie value for authentication.
        Nr   )videoTrequests package not found, please install it with `pip install bilibili-api-python`)r
   r   r   )r	   
credentialbilibili_apir   ImportError
Credential)selfr	   r
   r   r   r   s         k/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/bilibili.py__init__zBiliBiliLoader.__init__   sm    $ %	* V#..!HV / DO &,8  	4 	s	   9 Areturnc                     g }| j                   D ]4  }| j                  |      \  }}t        ||      }|j                  |       6 |S )z
        Load and return a list of documents containing video transcripts.

        Returns:
            List[Document]: List of Document objects transcripts and metadata.
        )page_contentmetadata)r	   _get_bilibili_subs_and_infor   append)r   resultsurl
transcript
video_infodocs         r   loadzBiliBiliLoader.load6   sP     ?? 	 C%)%E%Ec%J"J

ZHCNN3	 
     r   c                    t         j                  |      }	 ddlm}m} |r,|j                  |j                         | j                        }n]t        j                  |      }|r8|j                  t        |j                         dd       | j                        }nt        d|        ||j                               }|j                  d	|i       | j                  sd
|fS d}t        j                  |      }	|	r'|d   t        |	j                  d            dz
     d   }n|d   } ||j                  |            }
|
j!                  dg       }|r|d   j!                  dd
      }|j#                  d      sd|z   }t%        j                   |      }|j&                  dk(  rft)        j*                  |j,                        j!                  dg       }dj/                  |D cg c]  }|d   	 c}      }d|d    d|d    d| }||fS t1        j2                  d| d|j&                          d
|fS t1        j2                  d| d       d
|fS # t
        $ r t        d      w xY wc c}w )zU
        Retrieve video information and transcript for a given BiliBili URL.
        r   )syncr   r   )bvidr      N)aidr   z(Unable to find a valid video ID in URL: r    pages   cid	subtitlessubtitle_urlhttpzhttps:   body contentzVideo Title: titlez, description: descz

Transcript: zFailed to fetch subtitles for z. HTTP Status Code: zNo subtitles found for video: z. Returning empty transcript.)
BV_PATTERNsearchr   r%   r   r   Videogroupr   
AV_PATTERNint
ValueErrorget_infoupdatePAGE_INDEX_PATTERNget_subtitleget
startswithrequestsstatus_codejsonloadsr3   joinwarningswarn)r   r   r&   r%   r   vr(   r    r,   
page_matchsubsub_listsub_urlresponseraw_sub_titlescraw_transcriptraw_transcript_with_meta_infos                     r   r   z*BiliBiliLoader._get_bilibili_subs_and_infoE   s      %	0 $//JA##C(CKKC		AB$8T__KU #KC5!QRR!**,'
5#,' z>!'..s3
W%c**:*:1*=&>&BCC U#C 1>>#&'77;+qkoonb9G%%f-"W,||G,H##s*!%H,<,<!=!A!A&"!M!$*PA1Y<*P!Q $Jw$7#8 9$$.v$6#7 8##1"24 .
 5j@@4SE :))1)=)=(>@ :~ MM05RS
 :~y  	4 	R +Qs   I  I&I#N)r)   r)   r)   )__name__
__module____qualname____doc__r   strr   r   r"   r   dictr    r#   r   r   r      sq     I  	
 @d8n Cs CuS$Y7G Cr#   r   )rE   rerH   typingr   r   rC   langchain_core.documentsr   )langchain_community.document_loaders.baser   compiler6   r:   r?   r   rZ   r#   r   <module>r`      s[     	    - @ RZZ!
RZZ$
RZZ
+ wZ wr#   