
    "#h                     r    d dl mZ d dl mZmZmZ d dlZddlmZ  ej                  d      Z	 G d de
      Zy)	    )absolute_import)divisionprint_functionunicode_literalsN   )normalize_whitespacez\bh\d\bc                   z    e Zd ZdZd Zed        Zed        Zed        Zd Z	ed        Z
d Zd	 Zd
 Zd Zd Zy)	Paragraphz.Object representing one block of text in HTML.c                     |j                   | _        |j                  | _        g | _        d| _        d| _        d| _        y )Nr    )domdom_pathxpath
text_nodeschars_count_in_links
tags_count
class_type)selfpaths     N/var/www/html/sandstorm/venv/lib/python3.12/site-packages/justext/paragraph.py__init__zParagraph.__init__   s5    ZZ
$%!    c                 R    t        t        j                  | j                              S N)boolHEADINGS_PATTERNsearchr   r   s    r   
is_headingzParagraph.is_heading   s    $++DMM:;;r   c                      | j                   dk7  S )Ngood)r   r   s    r   is_boilerplatezParagraph.is_boilerplate   s    &((r   c                 j    dj                  | j                        }t        |j                               S )Nr   )joinr   r   stripr   texts     r   r'   zParagraph.text    s%    wwt'#DJJL11r   c                 ,    t        | j                        S r   )lenr'   r   s    r   __len__zParagraph.__len__%   s    499~r   c                 H    t        | j                  j                               S r   )r)   r'   splitr   s    r   words_countzParagraph.words_count(   s    499??$%%r   c                 ,    t        | j                        S r   )r   r   r   s    r   contains_textzParagraph.contains_text,   s    DOO$$r   c                 R    t        |      }| j                  j                  |       |S r   )r   r   appendr&   s     r   append_textzParagraph.append_text/   s#    #D)t$r   c                 \    t        fd| j                  j                         D              S )Nc              3   B   K   | ]  }|j                         v   y wr   )lower).0word	stopwordss     r   	<genexpr>z,Paragraph.stopwords_count.<locals>.<genexpr>5   s     K4::<9,Ks   )sumr'   r,   r   r8   s    `r   stopwords_countzParagraph.stopwords_count4   s    K9JKKKr   c                 ^    | j                   dk(  ry| j                  |      | j                   z  S Nr   )r-   r<   r;   s     r   stopwords_densityzParagraph.stopwords_density7   s/    q ##I.1A1AAAr   c                 V    t        | j                        }|dk(  ry| j                  |z  S r>   )r)   r'   r   )r   text_lengths     r   links_densityzParagraph.links_density=   s+    $))n!((;66r   N)__name__
__module____qualname____doc__r   propertyr   r"   r'   r*   r-   r/   r2   r<   r?   rB    r   r   r
   r
      sz    8 < < ) ) 2 2 & &%
LB7r   r
   )
__future__r   r   r   r   reutilsr   compiler   objectr
   rH   r   r   <module>rN      s5    ' A A 	 ' 2::j) 47 47r   