
    :QgE                    4   U d Z ddlmZ ddlZddlZddlZddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ  ed
      Z G d de      ZddZddZddZ ej:                  d       G d d             Z ee       ee      dZde d<   y)zHandles dispatch of elements to a chunking-strategy by name.

Also provides the `@add_chunking_strategy` decorator which is the chief current user of "by-name"
chunking dispatch.
    )annotationsN)AnyCallableIterableOptionalProtocol)	ParamSpec)chunk_elements)chunk_by_title)Element)get_call_args_applying_defaultslazyproperty_Pc                  $    e Zd ZdZ	 	 	 	 	 	 ddZy)Chunkerz*Abstract interface for chunking functions.c                    y)a  A chunking function must have this signature.

        In particular it must minimally have an `elements` parameter and all chunkers will have a
        `max_characters` parameter (doesn't need to follow `elements` directly). All others can
        vary by chunker.
        N )selfelementsmax_characterss      [/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/chunking/dispatch.py__call__zChunker.__call__   s     	    N)r   Iterable[Element]r   zOptional[int]returnlist[Element])__name__
__module____qualname____doc__r   r   r   r   r   r      s"    4	)	>K			r   r   c                      j                   r;d j                  j                  v r#d j                   vr xj                   dz  c_         t        j                         d fd       }|S )zDecorator for chunking text.

    Chunks the element sequence produced by the partitioner it decorates when a `chunking_strategy`
    argument is present in the partitioner call and it names an available chunking strategy.
    chunking_strategya  
chunking_strategy
	Strategy used for chunking text into larger or smaller elements.
	Defaults to `None` with optional arg of 'basic' or 'by_title'.
	Additional Parameters:
		multipage_sections
			If True, sections can span multiple pages. Defaults to True.
		combine_text_under_n_chars
			Combines elements (for example a series of titles) until a section
			reaches a length of n characters. Only applies to 'by_title' strategy.
		new_after_n_chars
			Cuts off chunks once they reach a length of n characters; a soft max.
		max_characters
			Chunks elements text and text_as_html (if present) into chunks
			of length n characters, a hard max.c                 x     | i |}t        g| i |}|j                  dd      }||S t        ||fi |S )z1The decorated function is replaced with this one.r"   N)r   popchunk)argskwargsr   	call_argsr"   funcs        r   wrapperz&add_chunking_strategy.<locals>.wrapperE   s^    
 (( 4DJ4J6J	%MM*=tD $O X0>I>>r   )r&   z_P.argsr'   z	_P.kwargsr   r   )r    __code__co_varnames	functoolswraps)r)   r*   s   ` r   add_chunking_strategyr/   '   sd     ||t}}888=PX\XdXd=d<	
" __T? ?" Nr   c                    t         j                  |      }|t        dt        |             |j	                         D ci c]  \  }}||j
                  v s|| }}} |j                  | fi |S c c}}w )zQDispatch chunking of `elements` to the chunking function for `chunking_strategy`.zunrecognized chunking strategy )_chunker_registryget
ValueErrorrepritemskw_arg_nameschunker)r   r"   r'   chunker_speckvchunking_kwargss          r   r%   r%   Z   s    $(():;L:4@Q;R:STUU )/Y1!|?X?X:Xq!tYOY<<O<< Zs   A4A4c                (    t        |      t        | <   y)zVMake chunker available by using `name` as `chunking_strategy` arg in partitioner call.N)_ChunkerSpecr1   )namer7   s     r   register_chunking_strategyr?   h   s    *73dr   T)frozenc                  0    e Zd ZU dZded<   	 edd       Zy)r=   zA registry entry for a chunker.r   r7   c                x    t        j                  | j                        }t        d |j                  D              S )zKeyword arguments supported by this chunker.

        These are all arguments other than the required `elements: list[Element]` first parameter.
        c              3  ,   K   | ]  }|d k7  s	|  yw)r   Nr   ).0keys     r   	<genexpr>z,_ChunkerSpec.kw_arg_names.<locals>.<genexpr>{   s     HScZ6GSHs   
)inspect	signaturer7   tuple
parameters)r   sigs     r   r6   z_ChunkerSpec.kw_arg_namest   s-     -HCNNHHHr   N)r   ztuple[str, ...])r   r   r   r    __annotations__r   r6   r   r   r   r=   r=   m   s"    )NI Ir   r=   )basicby_titlezdict[str, _ChunkerSpec]r1   )r)   Callable[_P, list[Element]]r   rO   )r   r   r"   strr'   r   r   r   )r>   rP   r7   r   r   None)!r    
__future__r   dataclassesdcr-   rG   typingr   r   r   r   r   typing_extensionsr	   unstructured.chunking.basicr
   unstructured.chunking.titler   unstructured.documents.elementsr   unstructured.utilsr   r   r   r   r/   r%   r?   	dataclassr=   r1   rL   r   r   r   <module>r\      s    #    > > ' 6 6 3 Lt_h 0f=4
 TI I I" .)^,. * r   