
    hhZ                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ  ej        e          Z G d de          ZdS )    N)AnyDictIteratorList)Document)	BaseModelmodel_validatorc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZ	e
ed<   d	Zeed
<   dZe
ed<   dZe
ed<   dZe
ed<   dZeed<   dZeed<    ed          ededefd                        ZdedefdZdedee         fdZdedee         fdZdedefdZdedee         fd Zdedee         fd!Zd"ed#edefd$Z d"ed%edefd&Z!d'S )(PubMedAPIWrappera`  
    Wrapper around PubMed API.

    This wrapper will use the PubMed API to conduct searches and fetch
    document summaries. By default, it will return the document summaries
    of the top-k results of an input search.

    Parameters:
        top_k_results: number of the top-scored document used for the PubMed tool
        MAX_QUERY_LENGTH: maximum length of the query.
          Default is 300 characters.
        doc_content_chars_max: maximum length of the document content.
          Content will be truncated if it exceeds this length.
          Default is 2000 characters.
        max_retry: maximum number of retries for a request. Default is 5.
        sleep_time: time to wait between retries.
          Default is 0.2 seconds.
        email: email address to be used for the PubMed API.
        api_key: API key to be used for the PubMed API.
    parsez;https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?base_url_esearchz:https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?base_url_efetch   	max_retryg?
sleep_time   top_k_resultsi,  MAX_QUERY_LENGTHi  doc_content_chars_maxzyour_email@example.comemail api_keybefore)modevaluesreturnc                 `    	 ddl }|j        |d<   n# t          $ r t          d          w xY w|S )z7Validate that the python package exists in environment.r   Nr   zZCould not import xmltodict python package. Please install it with `pip install xmltodict`.)	xmltodictr   ImportError)clsr   r   s      a/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/utilities/pubmed.pyvalidate_environmentz%PubMedAPIWrapper.validate_environment5   sZ    	'oF7OO 	 	 	B  	
 s    +queryc                     	 d |                      |d| j                           D             }|r"d                    |          d| j                 ndS # t          $ r}d| cY d}~S d}~ww xY w)z
        Run PubMed search and get the article meta information.
        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
        It uses only the most informative fields of article meta information.
        c                 Z    g | ](}d |d          d|d          d|d          d|d          )S )zPublished: 	Publishedz
Title: Titlez
Copyright Information: Copyright Informationz
Summary::
Summary ).0results     r!   
<listcomp>z(PubMedAPIWrapper.run.<locals>.<listcomp>M   ss       
 	2f[1 2 2 /2 2*01H*I2 2 %Y/2 2      Nz

zNo good PubMed Result was foundzPubMed exception: )loadr   joinr   	Exception)selfr#   docsexs       r!   runzPubMedAPIWrapper.runD   s    	- 
 #ii.E0E.E(FGG  D 7D!!">D$>">??6
  	- 	- 	-,,,,,,,,,	-s   AA 
A-A("A-(A-c              #     K   | j         dz   t          t          j                            |          h          z   d| j         dz   }| j        dk    r|d| j         z  }t          j                            |          }|	                                
                    d          }t          j        |          }|d         d         }|d         d	         D ]}|                     ||          V  d
S )z
        Search PubMed for documents matching the query.
        Return an iterator of dictionaries containing the document metadata.
        zdb=pubmed&term=z&retmode=json&retmax=z&usehistory=yr   	&api_key=utf-8esearchresultwebenvidlistN)r   strurllibr   quoter   r   requesturlopenreaddecodejsonloadsretrieve_article)r2   r#   urlr,   text	json_textr:   uids           r!   	lazy_loadzPubMedAPIWrapper.lazy_load^   s      ! 6<%%e,,-../ Hd&8GGGH 	 <2-t|---C'',,{{}}##G,,Jt$$	?+H5_-h7 	5 	5C''V444444	5 	5r.   c                 F    t          |                     |                    S )z
        Search PubMed for documents matching the query.
        Return a list of dictionaries containing the document metadata.
        )listrJ   r2   r#   s     r!   r/   zPubMedAPIWrapper.loadt   s    
 DNN5))***r.   docc                 N    |                     d          }t          ||          S )Nr)   )page_contentmetadata)popr   )r2   rN   summarys      r!   _dict2documentzPubMedAPIWrapper._dict2document{   s&    '')$$Ws;;;;r.   c              #   j   K   |                      |          D ]}|                     |          V  d S N)r#   )rJ   rT   )r2   r#   ds      r!   lazy_load_docszPubMedAPIWrapper.lazy_load_docs   sJ      e,, 	) 	)A%%a((((((	) 	)r.   c                 H    t          |                     |                    S rV   )rL   rX   rM   s     r!   	load_docszPubMedAPIWrapper.load_docs   s!    D''e'44555r.   rI   r:   c                 H   | j         dz   |z   dz   |z   }| j        dk    r|d| j         z  }d}	 	 t          j                            |          }n# t          j        j        $ rj}|j        dk    rS|| j        k     rHt          d| j
        d	d
           t          j        | j
                   | xj
        dz  c_
        |dz  }n|Y d }~nd }~ww xY w|                                                    d          }|                     |          }|                     ||          S )Nzdb=pubmed&retmode=xml&id=z&webenv=r   r7   r   Ti  zToo Many Requests, waiting for z.2fz seconds...      r8   )r   r   r=   r?   r@   error	HTTPErrorcoder   printr   timesleeprA   rB   r   _parse_article)	r2   rI   r:   rF   retryr,   exml_text	text_dicts	            r!   rE   z!PubMedAPIWrapper.retrieve_article   su    )*  	 	 <2-t|---C	//44<)   6S==UT^%;%; H'+GH H H   Jt///OOq(OOQJEEG EEEE		$ ;;==''00JJx((	""3	222s   A C$A C		Crh   c                 6   	 |d         d         d         d         }n$# t           $ r |d         d         d         }Y nw xY w|                    di                               dg           }d	 |D             }|rd
                    |          n^t          |t                    r|nGt          |t
                    r1d
                    d |                                D                       nd}|                    di           }d                    |                    dd          |                    dd          |                    dd          g          }||                    dd          ||                    di                               dd          |dS )NPubmedArticleSetPubmedArticleMedlineCitationArticlePubmedBookArticleBookDocumentAbstractAbstractTextc                 D    g | ]}d |v d|v 
|d          d|d           S )z#textz@Labelz: r*   )r+   txts     r!   r-   z3PubMedAPIWrapper._parse_article.<locals>.<listcomp>   sG     
 
 
#~~(c// 8}..G.."1//r.   
c              3   4   K   | ]}t          |          V  d S )N)r<   )r+   values     r!   	<genexpr>z2PubMedAPIWrapper._parse_article.<locals>.<genexpr>   s(      MMUc%jjMMMMMMr.   zNo abstract availableArticleDate-Yearr   MonthDayArticleTitleCopyrightInformation)rI   r'   r&   r(   r)   )KeyErrorgetr0   
isinstancer<   dictr   )	r2   rI   rh   arabstract_text	summariesrS   a_dpub_dates	            r!   rd   zPubMedAPIWrapper._parse_article   s   	T-.?@QRBB  	T 	T 	T-./BCNSBBB	Tz2..22>2FF
 
$
 
 
	 
DIIi    mS11 "-661DIIMMm6J6J6L6LMMMMMM0 	 ff]B''88##$$r""
 
 VVNB//!%'VVJ%;%;%?%?&& & 
 
 	
s    >>N)"__name__
__module____qualname____doc__r   __annotations__r   r<   r   r   intr   floatr   r   r   r   r   r	   classmethodr   r"   r5   r   r   rJ   r   r/   r   rT   rX   rZ   rE   rd   r*   r.   r!   r   r      sB         * JJJ 	F c    XOSWWWIsJ M3c!%3%%%)E3)))GS_(###$ 3    [ $#- - - - - -45s 5x~ 5 5 5 5,+# +$t* + + + +<$ <8 < < < <)C )HX,> ) ) ) )6s 6tH~ 6 6 6 6 3C  3  3  3  3  3  3D+
# +
$ +
4 +
 +
 +
 +
 +
 +
r.   r   )rC   loggingrb   urllib.errorr=   urllib.parseurllib.requesttypingr   r   r   r   langchain_core.documentsr   pydanticr   r	   	getLoggerr   loggerr   r*   r.   r!   <module>r      s                   , , , , , , , , , , , , - - - - - - / / / / / / / /		8	$	$D
 D
 D
 D
 D
y D
 D
 D
 D
 D
r.   