
    hhy4                       d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZ dd	lmZm Z  dd
l!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ erddl,m-Z- ddl.m/Z/m0Z0  ej1        e2          Z3dZ4 G d de          Z5 G d de          Z6ddZ7d dZ8 G d de#e          Z9dS )!1Base class for all loaders that uses O365 Package    )annotationsN)abstractmethod)datetime)PathPurePath)TYPE_CHECKINGAnyDictIterableListOptionalSequenceUnion)	BaseModelFieldFilePathPrivateAttr	SecretStr)BaseSettingsSettingsConfigDict)BaseBlobParser
BaseLoader)FileSystemBlobLoader)Blob)MimeTypeBasedParser)
get_parser)Account)DriveFolderi  P c                  v    e Zd ZU  edd          Zded<    edd          Zded<    ed	d
dd          ZdS )_O365Settings.O365_CLIENT_ID)aliasstr	client_idO365_CLIENT_SECRETr   client_secretFz.env ignore)case_sensitiveenv_file
env_prefixextraN)	__name__
__module____qualname__r   r&   __annotations__r(   r   model_config     k/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/document_loaders/base_o365.pyr"   r"   *   sr         U3&6777I7777$uS0DEEEMEEEE%%v"H  LLLr5   r"   c                  D    e Zd ZU  ej                    dz  dz  Zded<   dS )_O365TokenStorage.credentialszo365_token.txtr   
token_pathN)r/   r0   r1   r   homer:   r2   r4   r5   r6   r8   r8   3   s6         $49;;7:JJJJJJJJJr5   r8   
file_typesSequence[str]returnDict[str, str]c                z    i }| D ]5}t          j        d|           \  }}|r|||<   $t          d|           |S )2Fetch the mime types for the specified file types.zfile.zUnknown mimetype of extension )	mimetypes
guess_type
ValueError)r<   mime_types_mappingext	mime_type_s        r6   fetch_mime_typesrI   7   sg     E E +MCMM::	1 	E&/s##CcCCDDDr5   
mime_typesc                ~    i }| D ]7}t          j        |          }|r|||dd         <   &t          d|           |S )rA      NzUnknown mimetype )rB   guess_extensionrD   )rJ   rE   rG   rF   s       r6   fetch_extensionsrN   C   sb     > >	'	22 	>*3s122w''<<<===r5   c                  L    e Zd ZU dZ ee          Zded<   	 dZded<   	 e	Z
ded	<   	 dZded
<   	 dZded<   	 i Zded<   	  e            Zded<    e            Zded<    e            Zded<   d( fdZed)d            Zeed*d                        Zd+d!Zd,d%Zd-d'Z xZS ).O365BaseLoaderr   )default_factoryr"   settingsFboolauth_with_tokenzUnion[int, str]
chunk_size	recursiveNzOptional[datetime]modified_sincezOptional[Dict[str, Any]]handlersr   _blob_parserr=   _file_typesr?   _mime_typeskwargsr
   r>   Nonec                     t                      j        di |  j        r t           j                                                  }	 t          |           _        t          t          |                     _         fd j        	                                D             }n# t          $ ry 	 t          |           _        t          t           j                                                             _         j        }n!# t          $ r t          d| d          w xY wY nw xY wt          |d            _        d S t          d           _        t           j        t                    s$t!          dt#           j                             t          t           j        j                                                             _        d S )Nc                2    i | ]\  }}j         |         |S r4   r[   ).0	extensionhandlerselfs      r6   
<dictcomp>z+O365BaseLoader.__init__.<locals>.<dictcomp>   s7     ! ! !*	7 $Y/! ! !r5   z=`handlers` keys must be either file extensions or mimetypes.
zj could not be interpreted as either.
File extensions and mimetypes cannot mix. Use either one or the other)rX   fallback_parserdefaultzLget_parser("default) was supposed to return MimeTypeBasedParser.It returned r4   )super__init__rX   listkeysrI   r[   setrZ   itemsrD   rN   r   rY   r   
isinstance	TypeErrortype)rd   r\   handler_keysmime_handlers	__class__s   `   r6   ri   zO365BaseLoader.__init__   s   ""6"""= "	Y 2 2 4 455L#3L#A#A #'L(9(9#:#: ! ! ! !.2m.A.A.C.C! ! !    '7'E'ED$'+C0@0E0E0G0G,H,H'I'ID$$(MMM!   $6'6 6 6   "M !4&! ! !D !+9 5 5Dd/1DEE =#'(9#:#:= =    0T5F5O5T5T5V5V0W0WXXDs,   AB& &
D)1ADD)D##D)(D)c                    | j         S )zBReturn a dict of supported file types to corresponding mime types.r`   rd   s    r6   _fetch_mime_typesz O365BaseLoader._fetch_mime_types   s     r5   	List[str]c                    dS )zReturn required scopes.Nr4   ru   s    r6   _scopeszO365BaseLoader._scopes   s      r5   folderr    Iterable[Blob]c              #  (  K   | j         }|                                }i }t          j                    5 }t	          j        t          j                            |          d           |D ]2}|j        r'|j	        t          |                                          v r| j        r|j        | j        k    r|j        }t          j        d|j                  r4|j        j        dz   t$          j                            |j                  z   }|                    || j                   ||j	        t1          |j                  t1          |j                  t1          |j                  t1          |j                  |j        t1          |j                  d||j        <   4t=          |          }|                                D ]}}	tA          |	j        tB                    stE          d          |	j        rG|#                    t1          |	j        j                  i           }
|	j$        %                    |
           |	V  ~	 d	d	d	           n# 1 swxY w Y   | j&        r2|'                                D ]}| (                    |          E d	{V  d	S d	S )
a  Lazily load all files from a specified folder of the configured MIME type.

        Args:
            folder: The Folder instance from which the files are to be loaded. This
                Folder instance should represent a directory in a file system where the
                files are stored.

        Yields:
            An iterator that yields Blob instances, which are binary representations of
                the files loaded from the folder.
        T)exist_ok"Doc.aspx\?sourcedoc=.*file=([^&]+)/to_pathrU   sourcerG   createdmodified
created_bymodified_bydescriptionidpath#Expected blob path to be a PurePathN))rv   	get_itemstempfileTemporaryDirectoryosmakedirsr   dirnameis_filerG   rj   valuesrW   r   web_urlresearch_parenturllibparsequotenamedownloadrU   r%   r   r   r   r   	object_idr   yield_blobsrn   r   NotImplementedErrorgetmetadataupdaterV   get_child_folders_load_from_folder)rd   rz   file_mime_typesrm   metadata_dicttemp_dirfiler   loaderblobfile_metadata_	subfolders               r6   r   z O365BaseLoader._load_from_folder   s      0  ""35(** $	hK11DAAAA  < ~o.D.D.F.F)G)GGG $ 3  MD,???%)\F!y Et|    " %)L$8&)%*&,l&8&8&C&C%D !'
 !MM(tMWWW*0-1^+.t|+<+<,/,>,>.1$/.B.B/243C/D/D/3/?&)$.&9&9	8 	8M$)4 *x888F**,,  !$)X66 U-.STTT9 9%2%6%6s49>7J7JB%O%ONM((888



=$	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	J > 	=#5577 = =	11)<<<<<<<<<<	= 	== =s   HIIIdriver   
object_idsc              #  (  K   | j         }i }t          j                    5 }|D ]3}|                    |          }|st	          j        d| d| d           6|j        r|j        t          |	                                          v r|j
        }t          j        d|j
                  r4|j        j
        dz   t          j                            |j                  z   }|                    || j                   ||j        |j        |j        t-          |j                  t-          |j                  |j        t-          |j                  d||j        <   5t7          |          }	|	                                D ]}}
t;          |
j        t>                    stA          d	          |
j        rG|!                    t-          |
j        j                  i           }|
j"        #                    |           |
V  ~	 d
d
d
           d
S # 1 swxY w Y   d
S )a  Lazily load files specified by their object_ids from a drive.

        Load files into the system as binary large objects (Blobs) and return Iterable.

        Args:
            drive: The Drive instance from which the files are to be loaded. This Drive
                instance should represent a cloud storage service or similar storage
                system where the files are stored.
            object_ids: A list of object_id strings. Each object_id represents a unique
                identifier for a file in the drive.

        Yields:
            An iterator that yields Blob instances, which are binary representations of
            the files loaded from the drive using the specified object_ids.
        z!There isn't a file withobject_id z
 in drive .r~   r   r   r   r   r   N)$rv   r   r   get_itemloggingwarningr   rG   rj   r   r   r   r   r   r   r   r   r   r   rU   r   r   r%   r   r   r   r   r   r   rn   r   r   r   r   r   r   )rd   r   r   r   r   r   r   r   r   r   r   r   s               r6   _load_from_object_idsz$O365BaseLoader._load_from_object_ids   s     $ 035(** '	h'  	~~i00 OC%.C C:?C C C   < ~o.D.D.F.F)G)GGG!%9A4<   !% 4"%!&"(,"4"4TY"?"?!@ #
 h4?SSS&,)-'+|(,*-do*>*>+.t/?+@+@+/+;"%dn"5"5	4 	4di0 *x888F**,,  !$)X66 U-.STTT9 9%2%6%6s49>7J7JB%O%ONM((888



C'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	s   GHHHr   c                   	 ddl m}m} n# t          $ r t          d          w xY w| j        rht                      }|j        } ||j        |j                  } |d
| j	        j
        | j	        j                                        f| j        |dddi}no |t          j                    dz  	          } |d
| j	        j
        | j	        j                                        f| j        |dddi}|                                 |S )znAuthenticates the OneDrive API client

        Returns:
            The authenticated Account object.
        r   )r   FileSystemTokenBackendzAO365 package not found, please install it with `pip install o365`)r:   token_filename)credentialsscopestoken_backendraise_http_errorsFr9   )r:   r4   )O365r   r   ImportErrorrT   r8   r:   parentr   rR   r&   r(   get_secret_valuery   r   r;   authenticate)rd   r   r   token_storager:   r   accounts          r6   _authzO365BaseLoader._auth$  s   	<<<<<<<<< 	 	 	S  	  	#-//M&1J22%,Z_  M g M+M/@@BB |+  '. GG 329;;7  M g M+M/@@BB |+  '. G   """s    %)r\   r
   r>   r]   )r>   r?   )r>   rw   )rz   r    r>   r{   )r   r   r   rw   r>   r{   )r>   r   )r/   r0   r1   __doc__r   r"   rR   r2   rT   
CHUNK_SIZErU   rV   rW   rX   r   rY   rZ   r[   ri   propertyrv   r   ry   r   r   r   __classcell__)rs   s   @r6   rP   rP   O   s        ;;#eMBBBHBBBB0!O!!!!I",J,,,,VI8)-N----)+H++++: $/;==L0000!,K...."-+--K////$Y $Y $Y $Y $Y $YL       X  & & & ^ X&6= 6= 6= 6=p; ; ; ;z* * * * * * * *r5   rP   )r<   r=   r>   r?   )rJ   r=   r>   r?   ):r   
__future__r   r   rB   r   r   r   r   abcr   r   pathlibr   r   typingr	   r
   r   r   r   r   r   r   pydanticr   r   r   r   r   pydantic_settingsr   r   )langchain_community.document_loaders.baser   r   =langchain_community.document_loaders.blob_loaders.file_systemr   8langchain_community.document_loaders.blob_loaders.schemar   4langchain_community.document_loaders.parsers.genericr   5langchain_community.document_loaders.parsers.registryr   r   r   
O365.driver   r    	getLoggerr/   loggerr   r"   r8   rI   rN   rP   r4   r5   r6   <module>r      s   7 7 " " " " " "      				 				               " " " " " " " " V V V V V V V V V V V V V V V V V V V V              ? > > > > > > > P P P P P P P P      J I I I I I T T T T T T L L L L L L )((((((((		8	$	$
    L   K K K K K K K K	 	 	 		 	 	 	    Z     r5   