
    :Qg                         d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ  ej                  e eed       g      d        Zd	 Zd
 Zd Zy)    )partialN)chunk_elements)chunk_by_title)ElementMetadataNarrativeTextTextTitle)combine_text_under_n_chars)paramsc                     | j                   S )N)param)requests    h/var/www/html/answerous/venv/lib/python3.12/site-packages/test_unstructured/chunking/test_html_output.pychunking_fnr   
   s    ==    c                 ,   d}d}d}dj                  |||g      }t        dt        |            t        dt        |            t        d	t        |            g} | |      }t	        |      d
k(  sJ |d   j
                  j                  |k(  sJ y )N%<h1 class="Title" id="1">Header </h1>z@<time class="CalendarDate" id="2">Date: October 30, 2023 </time>z<form class="Form" id="3"> <label class="FormField" for="company-name" id="4">Form field name </label><input class="FormFieldValue" id="5" value="Example value" /></form> Headertext_as_htmltextmetadatazDate: October 30, 2023zForm field name Example value   r   )joinr	   r   r   lenr   r   )r   
metadata_1
metadata_2
metadata_3combined_metadataelementschunkss          r   Htest_combining_html_metadata_when_multiple_elements_in_composite_elementr$      s    8JSJ	  *j*!EF 	8o:&NO*_R\5]^0?Xb;c	
H "Fv;!!9**.????r   c           	         d}d}d}t        dt        |            t        dt        |d	            t        d
t        |d	            g} | |d      }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  d
k(  sJ |d   j
                  j                  |dz   |z   k(  sJ |d   j
                  j                  |k(  sJ y)aH  
    Ground truth
    <Document>
        <Page>
            <Section>
                <p>First</p>
                <p>Second</p>
            </Section>
        </Page>
    </Document>
    Elements: Document, Page, Section, Paragraph, Paragraph
    Chunk 1: Document, Page, Section, Paragraph

    Chunk 2:
        Paragraph
    z<div class="Section" id="1" />z&<p class="Paragraph" id="2">First </p>z'<p class="Paragraph" id="3">Second </p> r   r   First1)r   	parent_idSecond   max_characters   r   r   r   N)r   r   r   r   r   r   r   )r   r   r   r    r"   r#   s         r   Ftest_combining_html_metadata_with_nested_relationship_between_elementsr/   &   s    $ 2J9J:J 	"JGH?
VY#Z	
 	OWZ$[	
H !4Fv;!!9>>W$$$!9>>X%%%!9**j3.>.KKKK!9**j888r   c                 4   d}t        dt        |            g} | |d      }t        |      dk(  sJ |d   j                  d	k(  sJ |d
   j                  dk(  sJ |d   j                  j
                  dk(  sJ |d
   j                  j
                  dk(  sJ y)z2Mimic behaviour of elements with non-html metadatar   r   r   r      r,   r.   r   Hear   derN)r	   r   r   r   r   r   )r   r   r"   r#   s       r   ;test_html_metadata_exist_in_both_element_when_text_is_splitr4   N   s    8J8o:&NOH !4Fv;!!9>>U"""!9>>U"""!9**.UUUU!9**.UUUUr   )	functoolsr   pytestunstructured.chunking.basicr   unstructured.chunking.titler   unstructured.documents.elementsr   r   r   r	   fixturer   r$   r/   r4    r   r   <module>r<      sV      6 6 W W [\(]^_ `@.%9PVr   