
    hh                        d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ erd dlmZ  G d d	e
          Zd
S )    )annotations)TYPE_CHECKINGIteratorListOptional)Document)
BaseLoader)Blob)!AzureAIDocumentIntelligenceParser)TokenCredentialc                  :    e Zd ZdZ	 	 	 	 	 	 	 ddddddZddZdS )!AzureAIDocumentIntelligenceLoaderz,Load a PDF with Azure Document Intelligence.Nprebuilt-layoutmarkdown)analysis_featuresazure_credentialapi_endpointstrapi_keyOptional[str]	file_pathurl_pathbytes_sourceOptional[bytes]api_version	api_modelmoder   Optional[List[str]]r   Optional['TokenCredential']returnNonec	          	         |||
J d            ||

J d            ||

J d            || _         || _        || _        t          ||||||	|
          | _        dS )a
  
        Initialize the object for file processing with Azure Document Intelligence
        (formerly Form Recognizer).

        This constructor initializes a AzureAIDocumentIntelligenceParser object to be
        used for parsing files using the Azure Document Intelligence API. The load
        method generates Documents whose content representations are determined by the
        mode parameter.

        Parameters:
        -----------
        api_endpoint: str
            The API endpoint to use for DocumentIntelligenceClient construction.
        api_key: str
            The API key to use for DocumentIntelligenceClient construction.
        file_path : Optional[str]
            The path to the file that needs to be loaded.
            Either file_path, url_path or bytes_source must be specified.
        url_path : Optional[str]
            The URL to the file that needs to be loaded.
            Either file_path, url_path or bytes_source must be specified.
        bytes_source : Optional[bytes]
            The bytes array of the file that needs to be loaded.
            Either file_path, url_path or bytes_source must be specified.
        api_version: Optional[str]
            The API version for DocumentIntelligenceClient. Setting None to use
            the default value from `azure-ai-documentintelligence` package.
        api_model: str
            Unique document model name. Default value is "prebuilt-layout".
            Note that overriding this default value may result in unsupported
            behavior.
        mode: Optional[str]
            The type of content representation of the generated Documents.
            Use either "single", "page", or "markdown". Default value is "markdown".
        analysis_features: Optional[List[str]]
            List of optional analysis features, each feature should be passed
            as a str that conforms to the enum `DocumentAnalysisFeature` in
            `azure-ai-documentintelligence` package. Default value is None.
        azure_credential: Optional[TokenCredential]
            The credentials to use for DocumentIntelligenceClient construction, when
            using credentials other than api_key (like AD).

        Examples:
        ---------
        >>> obj = AzureAIDocumentIntelligenceLoader(
        ...     file_path="path/to/file",
        ...     api_endpoint="https://endpoint.azure.com",
        ...     api_key="APIKEY",
        ...     api_version="2023-10-31-preview",
        ...     api_model="prebuilt-layout",
        ...     mode="markdown"
        ... )
        Nz4file_path, url_path or bytes_source must be providedz4Either api_key or azure_credential must be provided.z;Only one of api_key or azure_credential should be provided.)r   r   r   r   r   r   r   )r   r   r   r   parser)selfr   r   r   r   r   r   r   r   r   r   s              r/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/document_loaders/doc_intelligence.py__init__z*AzureAIDocumentIntelligenceLoader.__init__   s    J !X%9\=U=UA >V=U=U "&6&B&BB 'C&B&B "2":":I #;":": # (7%#/-
 
 
    Iterator[Document]c              #  `  K   | j         ;t          j        | j                   }| j                            |          E d{V  dS | j        '| j                            | j                  E d{V  dS | j        '| j                            | j                  E d{V  dS t          d          )z Lazy load the document as pages.NzNo data source provided.)
r   r
   	from_pathr#   parser   	parse_urlr   parse_bytes
ValueError)r$   blobs     r%   	lazy_loadz+AzureAIDocumentIntelligenceLoader.lazy_loadr   s       >%>$.11D{((...........]&{,,T];;;;;;;;;;;*{..t/@AAAAAAAAAAA7888r'   )NNNNNr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   )r    r(   )__name__
__module____qualname____doc__r&   r0    r'   r%   r   r      sx        66
 "&#'"&(,%)*\
 268<\
 \
 \
 \
 \
 \
|9 9 9 9 9 9r'   r   N)
__future__r   typingr   r   r   r   langchain_core.documentsr   )langchain_community.document_loaders.baser	   1langchain_community.document_loaders.blob_loadersr
   ,langchain_community.document_loaders.parsersr   azure.core.credentialsr   r   r5   r'   r%   <module>r=      s    " " " " " " : : : : : : : : : : : : - - - - - - @ @ @ @ @ @ B B B B B B       7666666m9 m9 m9 m9 m9
 m9 m9 m9 m9 m9r'   