
    hh&                        d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ erd dlmZ  ej        e          Z G d d	e          ZdS )
    )annotationsN)TYPE_CHECKINGAnyIteratorListOptional)Document)BaseBlobParser)Blob)TokenCredentialc                  P    e Zd ZdZ	 	 	 	 	 	 dd dZd!dZd!dZd"dZd#dZd$dZ	dS )%!AzureAIDocumentIntelligenceParserzMLoads a PDF with Azure Document Intelligence
    (formerly Forms Recognizer).Nprebuilt-layoutmarkdownapi_endpointstrapi_keyOptional[str]api_version	api_modelmodeanalysis_featuresOptional[List[str]]azure_credentialOptional['TokenCredential']c                0   ddl m} ddlm ddlm}	 i }
||t          d          |r|rt          d          |||
d<    |d||p
 |	|          dd	id
|
| _        || _        || _	        d | _
        |fd|D             | _
        | j	        dv sJ d S )Nr   )DocumentIntelligenceClient)DocumentAnalysisFeature)AzureKeyCredentialz4Either api_key or azure_credential must be provided.z;Only one of api_key or azure_credential should be provided.r   zx-ms-useragentzlangchain-parser/1.0.0)endpoint
credentialheadersc                &    g | ]} |          S  r$   ).0featurer   s     z/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/document_loaders/parsers/doc_intelligence.py
<listcomp>z>AzureAIDocumentIntelligenceParser.__init__.<locals>.<listcomp>:   s2       5<''00      )singlepager   r$   )azure.ai.documentintelligencer   $azure.ai.documentintelligence.modelsr   azure.core.credentialsr   
ValueErrorclientr   r   features)selfr   r   r   r   r   r   r   r   r   kwargsr   s              @r'   __init__z*AzureAIDocumentIntelligenceParser.__init__   s=    	MLLLLLPPPPPP======?/7STTT 	' 	M   "$/F=!00 
!'F+=+=g+F+F%'?@
 
 	
 
 #	AE(   @Q  DM y:::::::r)   resultr   returnIterator[Document]c              #     K   |j         D ]B}d                    d |j        D                       }t          |d|j        i          }|V  Cd S )N c                    g | ]	}|j         
S r$   )content)r%   lines     r'   r(   zIAzureAIDocumentIntelligenceParser._generate_docs_page.<locals>.<listcomp>A   s    AAAAAAr)   r+   page_contentmetadata)pagesjoinlinesr	   page_number)r2   r5   pr;   ds        r'   _generate_docs_pagez5AzureAIDocumentIntelligenceParser._generate_docs_page?   ss       		 		AhhAAAAABBG$AM  A GGGG		 		r)   c              #  ^   K   t          |j        |                                          V  d S )Nr=   )r	   r;   as_dict)r2   r5   s     r'   _generate_docs_singlez7AzureAIDocumentIntelligenceParser._generate_docs_singleK   s1      FNV^^=M=MNNNNNNNNr)   blobr   c              #    K   |                                 5 }| j                            | j        |d| j        dk    rdnd| j                  }|                                }| j        dv r|                     |          E d{V  n<| j        dv r|                     |          E d{V  nt          d| j                   ddd           dS # 1 swxY w Y   dS )	zLazily parse the blob.zapplication/octet-streamr   text)bodycontent_typeoutput_content_formatr1   r*   r   Nr+   Invalid mode: )
as_bytes_ior0   begin_analyze_documentr   r   r1   r5   rI   rF   r/   )r2   rJ   file_objpollerr5   s        r'   
lazy_parsez,AzureAIDocumentIntelligenceParser.lazy_parseN   sP       	?8[77748I4K4KjjQW 8  F ]]__Fy22255f==========h&&33F;;;;;;;;;; !=$)!=!=>>>	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	?s   B,CCCurlc              #  ~  K   ddl m} | j                            | j         ||          | j        dk    rdnd| j                  }|                                }| j        dv r|                     |          E d {V  d S | j        dv r| 	                    |          E d {V  d S t          d	| j                   )
Nr   AnalyzeDocumentRequest)
url_sourcer   rL   rM   rO   r1   rP   rQ   rR   r-   r[   r0   rT   r   r   r1   r5   rI   rF   r/   )r2   rX   r[   rV   r5   s        r'   	parse_urlz+AzureAIDocumentIntelligenceParser.parse_urlb   s      OOOOOO33N''377704	Z0G0G**V]	 4 
 
 9...11&99999999999Y(""//777777777779di99:::r)   bytes_sourcebytesc              #  ~  K   ddl m} | j                            | j         ||          | j        dk    rdnd| j                  }|                                }| j        dv r|                     |          E d {V  d S | j        dv r| 	                    |          E d {V  d S t          d	| j                   )
Nr   rZ   )r`   r   rL   r]   rP   rQ   rR   r^   )r2   r`   r[   rV   r5   s        r'   parse_bytesz-AzureAIDocumentIntelligenceParser.parse_bytest   s      OOOOOO33N''\BBB04	Z0G0G**V]	 4 
 
 9...11&99999999999Y(""//777777777779di99:::r)   )NNr   r   NN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r5   r   r6   r7   )rJ   r   r6   r7   )rX   r   r6   r7   )r`   ra   r6   r7   )
__name__
__module____qualname____doc__r4   rF   rI   rW   r_   rc   r$   r)   r'   r   r      s        $ $ "&%)*158<(; (; (; (; (;T
 
 
 
O O O O? ? ? ?(; ; ; ;$; ; ; ; ; ;r)   r   )
__future__r   loggingtypingr   r   r   r   r   langchain_core.documentsr	   )langchain_community.document_loaders.baser
   1langchain_community.document_loaders.blob_loadersr   r.   r   	getLoggerrd   loggerr   r$   r)   r'   <module>rp      s    " " " " " "  ? ? ? ? ? ? ? ? ? ? ? ? ? ? - - - - - - D D D D D D B B B B B B 7666666		8	$	$s; s; s; s; s; s; s; s; s; s;r)   