
    hh                     J    d dl Z d dlmZmZmZ d dlmZ  G d de          ZdS )    N)CallableListPattern)CodeSegmenterc                   <    e Zd ZU dZ ej        dej                  Zee	d<    ej        dej                  Z
ee	d<    ej        dej                  Zee	d<   def fd	Zd
efdZdeded
efdZded
efdZded
ee         fdZd
ee         fdZd
efdZ xZS )CobolSegmenterzCode segmenter for `COBOL`.z^[A-Z0-9\-]+(\s+.*)?\.$PARAGRAPH_PATTERNz=^\s*(IDENTIFICATION|DATA|PROCEDURE|ENVIRONMENT)\s+DIVISION.*$DIVISION_PATTERNz^\s*[A-Z0-9\-]+\s+SECTION.$SECTION_PATTERNcodec                     t                                          |           | j                                        | _        d S N)super__init__r   
splitlinessource_lines)selfr   	__class__s     x/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/document_loaders/parsers/language/cobol.pyr   zCobolSegmenter.__init__   s7    '+y';';'='=    returnc                 D     t           fd j        D                       S )Nc              3   L   K   | ]}j                             |          V  d S r   )r
   match).0liner   s     r   	<genexpr>z*CobolSegmenter.is_valid.<locals>.<genexpr>   s4      SS4(..t44SSSSSSr   )anyr   )r   s   `r   is_validzCobolSegmenter.is_valid   s)    SSSSARSSSSSSr   	start_idxend_idxc                 l    d                     | j        ||                                       d          S )N
)joinr   rstrip)r   r    r!   s      r   _extract_codezCobolSegmenter._extract_code   s/    yy*9W+<=>>EEdKKKr   r   c                 6    d|                                 v rdS dS )zHCheck if a line is part of the procedure division or a relevant section.PROCEDURE DIVISIONTF)upper)r   r   s     r   _is_relevant_codez CobolSegmenter._is_relevant_code   s    4::<<//4ur   funcc                    g }d}d}t          | j                  D ]\  }}|                     |          rd}|r| j                            |                                                    d          d                   s,| j                            |                                          r| ||||           |}| |||t          | j                             |S )zAA generic function to process COBOL lines based on provided func.NFT r   )		enumerater   r*   r	   r   stripsplitr   len)r   r+   elementsr    inside_relevant_sectionir   s          r   _process_lineszCobolSegmenter._process_lines$   s     	"' !233 
	 
	GAt%%d++ /*.'& &,,TZZ\\-?-?-D-DQ-GHH'--djjll;; (D9a000	  D9c$*;&<&<===r   c                 ~     dt           t                   dt          dt          dd f fd}                     |          S )Nr2   r    r!   r   c                 Z    |                                          ||                     d S r   )appendr&   )r2   r    r!   r   s      r   extract_funcz>CobolSegmenter.extract_functions_classes.<locals>.extract_func=   s+    OOD..y'BBCCCCCr   )r   strintr5   )r   r9   s   ` r   extract_functions_classesz(CobolSegmenter.extract_functions_classes<   sa    	D49 	D 	Ds 	Dt 	D 	D 	D 	D 	D 	D ""<000r   c                    g }d}d}| j         D ]}d|v pxd|v ptd|v pp| j                            |                                                    d          d                   p+| j                            |                                          }|rd}d}|r1|r|                    |           |s|                    d           d}d	                    |          S )
NFr(   zDATA DIVISIONzIDENTIFICATION DIVISIONr-   r   Tz* OMITTED CODE *r#   )r   r	   r   r/   r0   r   r8   r$   )r   simplified_linesr3   omitted_code_addedr   	is_headers         r   simplify_codezCobolSegmenter.simplify_codeB   s"   &("' 	 % 	. 	.D$, <"d*<,4< )//

0B0B30G0G0JKK< '--djjll;;   +*.' &+"& . .$++D1111+ . %++,>???)-&yy)***r   )__name__
__module____qualname____doc__recompile
IGNORECASEr	   r   __annotations__r
   r   r:   r   boolr   r;   r&   r*   r   r   r5   r<   rA   __classcell__)r   s   @r   r   r   	   s        %%!+,F!V!VwVVV *
H"-! !g     *rz*H"-XXOWXXX>S > > > > > >T$ T T T TLs LS LS L L L Lc d    8 S	    0149 1 1 1 1 +s  +  +  +  +  +  +  +  +r   r   )rF   typingr   r   r   Dlangchain_community.document_loaders.parsers.language.code_segmenterr   r    r   r   <module>rO      s    				 * * * * * * * * * *     
Y+ Y+ Y+ Y+ Y+] Y+ Y+ Y+ Y+ Y+r   