
    hy4                       d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZ dd	lmZm Z  dd
l!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ erddl,m-Z- ddl.m/Z/m0Z0  ejb                  e2      Z3dZ4 G d de      Z5 G d de      Z6ddZ7ddZ8 G d de#e      Z9y)1Base class for all loaders that uses O365 Package    )annotationsN)abstractmethod)datetime)PathPurePath)TYPE_CHECKINGAnyDictIterableListOptionalSequenceUnion)	BaseModelFieldFilePathPrivateAttr	SecretStr)BaseSettingsSettingsConfigDict)BaseBlobParser
BaseLoader)FileSystemBlobLoader)Blob)MimeTypeBasedParser)
get_parser)Account)DriveFolderi  P c                  b    e Zd ZU  edd      Zded<    edd      Zded<    ed	d
dd      Zy)_O365Settings.O365_CLIENT_ID)aliasstr	client_idO365_CLIENT_SECRETr   client_secretFz.env ignore)case_sensitiveenv_file
env_prefixextraN)	__name__
__module____qualname__r   r&   __annotations__r(   r   model_config     l/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/document_loaders/base_o365.pyr"   r"   *   s9    3&67Is7$S0DEM9E%v"HLr5   r"   c                  F    e Zd ZU  ej                         dz  dz  Zded<   y)_O365TokenStorage.credentialszo365_token.txtr   
token_pathN)r/   r0   r1   r   homer:   r2   r4   r5   r6   r8   r8   3   s    $499;7:JJJJr5   r8   c                x    i }| D ]2  }t        j                  d|       \  }}|r|||<   &t        d|        |S )2Fetch the mime types for the specified file types.zfile.zUnknown mimetype of extension )	mimetypes
guess_type
ValueError)
file_typesmime_types_mappingext	mime_type_s        r6   fetch_mime_typesrF   7   s\     E ++eC5M:	1&/s#=cUCDDE r5   c                r    i }| D ]/  }t        j                  |      }|r	|||dd <   #t        d|        |S )r=      NzUnknown mimetype )r>   guess_extensionr@   )
mime_typesrB   rD   rC   s       r6   fetch_extensionsrK   C   sV     >	''	2*3s12w'0<==> r5   c                  .    e Zd ZU dZ ee      Zded<   	 dZded<   	 e	Z
ded	<   	 dZded
<   	 dZded<   	 i Zded<   	  e       Zded<    e       Zded<    e       Zded<   d fdZedd       Zeedd              ZddZ	 	 	 	 	 	 d dZd!dZ xZS )"O365BaseLoaderr   )default_factoryr"   settingsFboolauth_with_tokenzUnion[int, str]
chunk_size	recursiveNzOptional[datetime]modified_sincezOptional[Dict[str, Any]]handlersr   _blob_parserSequence[str]_file_typesDict[str, str]_mime_typesc                ~   t        |   di | | j                  rt        | j                  j	                               }	 t        |      | _        t        t        |            | _        | j                  j                         D ci c]  \  }}| j                  |   | }}}t        |d       | _        y t        d      | _        t        | j                  t              s!t!        dt#        | j                               t        t        | j                  j                  j	                                     | _        y c c}}w # t        $ ro 	 t        |      | _        t        t        | j                  j	                                     | _        | j                  }n# t        $ r t        d| d      w xY wY w xY w)Nz=`handlers` keys must be either file extensions or mimetypes.
zj could not be interpreted as either.
File extensions and mimetypes cannot mix. Use either one or the other)rU   fallback_parserdefaultzLget_parser("default) was supposed to return MimeTypeBasedParser.It returned r4   )super__init__rU   listkeysrF   rZ   setrX   itemsr@   rK   r   rV   r   
isinstance	TypeErrortype)selfkwargshandler_keys	extensionhandlermime_handlers	__class__s         r6   r_   zO365BaseLoader.__init__   s   "6"== 2 2 45L#3L#A #'L(9#:  /3mm.A.A.C!*	7 $$Y/8! !$ !4&!D !+9 5Dd//1DE##'(9(9#:";=   0T5F5F5O5O5T5T5V0WXD9!  '7'ED$'+C0@0@0E0E0G,H'ID$$(MMM! $X'. )66  "s>   AE D>!E >E 	F<AFF<F55F<;F<c                    | j                   S )zBReturn a dict of supported file types to corresponding mime types.)rZ   rg   s    r6   _fetch_mime_typesz O365BaseLoader._fetch_mime_types   s     r5   c                     y)zReturn required scopes.Nr4   ro   s    r6   _scopeszO365BaseLoader._scopes   s    r5   c              #    K   | j                   }|j                         }i }t        j                         5 }t	        j
                  t        j                  j                  |      d       |D ]s  }|j                  s|j                  t        |j                               v s7| j                  r|j                  | j                  kD  s]|j                  }t        j                   d|j                        rC|j"                  j                  dz   t$        j&                  j)                  |j*                        z   }|j-                  || j.                         ||j                  t1        |j2                        t1        |j                        t1        |j4                        t1        |j6                        |j8                  t1        |j:                        d||j*                  <   v t=        |      }|j?                         D ]  }	tA        |	j                  tB              stE        d      |	j                  rJ|jG                  t1        |	j                  j*                        i       }
|	jH                  jK                  |
       |	  	 d	d	d	       | jL                  r/|jO                         D ]  }| jQ                  |      E d	{     y	y	# 1 sw Y   ExY w7 w)
a  Lazily load all files from a specified folder of the configured MIME type.

        Args:
            folder: The Folder instance from which the files are to be loaded. This
                Folder instance should represent a directory in a file system where the
                files are stored.

        Yields:
            An iterator that yields Blob instances, which are binary representations of
                the files loaded from the folder.
        T)exist_ok"Doc.aspx\?sourcedoc=.*file=([^&]+)/to_pathrR   sourcerD   createdmodified
created_bymodified_bydescriptionidpath#Expected blob path to be a PurePathN))rp   	get_itemstempfileTemporaryDirectoryosmakedirsr   dirnameis_filerD   r`   valuesrT   r|   web_urlresearch_parenturllibparsequotenamedownloadrR   r%   r{   r}   r~   r   	object_idr   yield_blobsrd   r   NotImplementedErrorgetmetadataupdaterS   get_child_folders_load_from_folder)rg   folderfile_mime_typesrc   metadata_dicttemp_dirfilerz   loaderblobfile_metadata_	subfolders               r6   r   z O365BaseLoader._load_from_folder   s9     00  "35((* $	hKK1DA <<~~o.D.D.F)GG $ 3 3 MMD,?,??%)\\F!yy Et||  %)LL$8$8&)%*&,ll&8&8&C%D !'
 !MM(tMW*0-1^^+.t||+<,/,>.1$//.B/243C3C/D/3/?/?&)$..&9	8M$))4!6 *x8F**, !$))X6-.STT99%2%6%6s499>>7JB%ONMM((8
=$	J >>#557 =	11)<<<= K$	 $	N =s<   3KAK=%K#%K	F:K;K?K KKKc              #    K   | j                   }i }t        j                         5 }|D ]k  }|j                  |      }|st	        j
                  d| d| d       4|j                  sA|j                  t        |j                               v sg|j                  }t        j                  d|j                        rC|j                  j                  dz   t        j                  j!                  |j"                        z   }|j%                  || j&                         ||j                  |j(                  |j*                  t-        |j.                        t-        |j0                        |j2                  t-        |j4                        d||j"                  <   n t7        |      }	|	j9                         D ]  }
t;        |
j<                  t>              stA        d	      |
j<                  rJ|jC                  t-        |
j<                  j"                        i       }|
jD                  jG                  |       |
  	 d
d
d
       y
# 1 sw Y   y
xY ww)a  Lazily load files specified by their object_ids from a drive.

        Load files into the system as binary large objects (Blobs) and return Iterable.

        Args:
            drive: The Drive instance from which the files are to be loaded. This Drive
                instance should represent a cloud storage service or similar storage
                system where the files are stored.
            object_ids: A list of object_id strings. Each object_id represents a unique
                identifier for a file in the drive.

        Yields:
            An iterator that yields Blob instances, which are binary representations of
            the files loaded from the drive using the specified object_ids.
        z!There isn't a file withobject_id z
 in drive .ru   rv   rw   ry   r   r   N)$rp   r   r   get_itemloggingwarningr   rD   r`   r   r   r   r   r   r   r   r   r   r   rR   r{   r|   r%   r}   r~   r   r   r   r   rd   r   r   r   r   r   r   )rg   drive
object_idsr   r   r   r   r   rz   r   r   r   s               r6   _load_from_object_idsz$O365BaseLoader._load_from_object_ids   s    $ 0035((* '	h' 	~~i0OO%%.Kz%C <<~~o.D.D.F)GG!%99A4<< !% 4 4"%!&"(,,"4"4TYY"?!@ #
 h4??S&,)-'+||(,*-doo*>+.t/?/?+@+/+;+;"%dnn"5	4dii0)> *x8F**, !$))X6-.STT99%2%6%6s499>>7JB%ONMM((8
C'	 '	 '	s*   #IAI)%IF(I8	II
Ic                h   	 ddl m}m} | j                  rt               }|j                  } ||j                  |j                        } |d
| j                  j                  | j                  j                  j                         f| j                  |dddi}|S  |t        j                         dz  	      } |d
| j                  j                  | j                  j                  j                         f| j                  |dddi}|j!                          |S # t        $ r t        d      w xY w)znAuthenticates the OneDrive API client

        Returns:
            The authenticated Account object.
        r   )r   FileSystemTokenBackendzAO365 package not found, please install it with `pip install o365`)r:   token_filename)credentialsscopestoken_backendraise_http_errorsFr9   )r:   r4   )O365r   r   ImportErrorrQ   r8   r:   parentr   rO   r&   r(   get_secret_valuerr   r   r;   authenticate)rg   r   r   token_storager:   r   accounts          r6   _authzO365BaseLoader._auth$  s?   	<
 -/M&11J2%,,Z__M  MM++MM//@@B ||+ '.G0  399;7M  MM++MM//@@B ||+ '.G   "E  	S 	s   D D1)rh   r
   returnNone)r   rY   )r   	List[str])r   r    r   Iterable[Blob])r   r   r   r   r   r   )r   r   )r/   r0   r1   __doc__r   r"   rO   r2   rQ   
CHUNK_SIZErR   rS   rT   rU   r   rV   rX   rZ   r_   propertyrp   r   rr   r   r   r   __classcell__)rm   s   @r6   rM   rM   O   s    ;#MBHmB0!OT!I",J,VIt8)-N&-)+H&+: $/=L.0!,K."--K/$YL     &  &6=p;;(1;	;z*r5   rM   )rA   rW   r   rY   )rJ   rW   r   rY   ):r   
__future__r   r   r>   r   r   r   r   abcr   r   pathlibr   r   typingr	   r
   r   r   r   r   r   r   pydanticr   r   r   r   r   pydantic_settingsr   r   )langchain_community.document_loaders.baser   r   =langchain_community.document_loaders.blob_loaders.file_systemr   8langchain_community.document_loaders.blob_loaders.schemar   4langchain_community.document_loaders.parsers.genericr   5langchain_community.document_loaders.parsers.registryr   r   r   
O365.driver   r    	getLoggerr/   loggerr   r"   r8   rF   rK   rM   r4   r5   r6   <module>r      s    7 "   	 	     " V V V  ? P J T L(			8	$
L K K		Z r5   