
    hh                     R    d dl mZ d dlmZ d dlmZ d dlmZ  G d de          ZdS )    )Iterator)Document)BaseBlobParser)Blobc                   .    e Zd ZdZdedee         fdZdS )MsWordParserz/Parse the Microsoft Word documents from a blob.blobreturnc              #     K   	 ddl m} ddlm} n"# t          $ r}t	          d          |d}~ww xY w||d}|j        dvrt          d          |                                5 } ||j                 |          }d	                    d
 |D                       }d|j	        i}	t          ||	          V  ddd           dS # 1 swxY w Y   dS )zParse a Microsoft Word document into the Document iterator.

        Args:
            blob: The blob to parse.

        Returns: An iterator of Documents.

        r   )partition_doc)partition_docxzNCould not import unstructured, please install with `pip install unstructured`.N)zapplication/mswordzGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentz0This blob type is not supported for this parser.)filez

c                 ,    g | ]}t          |          S  )str).0els     p/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/document_loaders/parsers/msword.py
<listcomp>z+MsWordParser.lazy_parse.<locals>.<listcomp>+   s    ;;;BB;;;    source)page_contentmetadata)unstructured.partition.docr   unstructured.partition.docxr   ImportErrormimetype
ValueErroras_bytes_iojoinr   r   )
selfr	   r   r   emime_type_parserword_documentelementstextr   s
             r   
lazy_parsezMsWordParser.lazy_parse   s     	@@@@@@BBBBBBB 	 	 	!  	 #0
 
 = !
 
 
 OPPP 	A=6'6MJJJH;;;;(;;;<<D $+.Hx@@@@@@		A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	As#    
0+0$ACCCN)__name__
__module____qualname____doc__r   r   r   r'   r   r   r   r   r   	   sI        99!At !A(: !A !A !A !A !A !Ar   r   N)	typingr   langchain_core.documentsr   )langchain_community.document_loaders.baser   1langchain_community.document_loaders.blob_loadersr   r   r   r   r   <module>r0      s          - - - - - - D D D D D D B B B B B B$A $A $A $A $A> $A $A $A $A $Ar   