
    "#h                         d dl mZ d dl mZmZmZ d dlZd dlZd dlZd dlZ ej                  dej                        Zd Zd Zd Zd Zd	 Zy)
    )absolute_import)divisionprint_functionunicode_literalsNz\s+c                 6    t         j                  t        |       S )z
    Translates multiple whitespace into single space character.
    If there is at least one new line character chunk is replaced
    by single LF (Unix new line) character.
    )MULTIPLE_WHITESPACE_PATTERNsub_replace_whitespace)texts    J/var/www/html/sandstorm/venv/lib/python3.12/site-packages/justext/utils.pynormalize_whitespacer      s     '**+>EE    c                 :    | j                         }d|v sd|v rdS dS )zBNormalize all spacing characters that aren't a newline to a space.
 )group)matchr   s     r   r
   r
      s$    ;;=D4<44<48S8r   c                 ,    |  xs | j                         S )zw
    Returns `True` if string contains only white-space characters
    or is empty. Otherwise `False` is returned.
    )isspace)strings    r   is_blankr      s    
 :)))r   c                  v   t         j                  j                  t        j                  d   j
                        } t         j                  j                  | d      } g }t        j                  |       D ];  }t         j                  j                  |      \  }}|dk(  s+|j                  |       = t        |      S )z,Returns a collection of built-in stop-lists.justext	stoplistsz.txt)ospathdirnamesysmodules__file__joinlistdirsplitextappend	frozenset)path_to_stoplistsstoplist_namesfilenamename	extensions        r   get_stoplistsr,   %   s    I(>(G(GH%6DNJJ01 (''**84i!!$'(
 ^$$r   c                     t         j                  j                  dd| z        }	 t        j                  d|      }t        d |j                         D              S # t
        $ r t        d| z        w xY w)zAReturns an built-in stop-list for the language as a set of words.r   z%s.txtr   zStoplist for language '%s' is missing. Please use function 'get_stoplists' for complete list of stoplists and feel free to contribute by your own stoplist.c              3   Z   K   | ]#  }|j                  d       j                          % yw)utf8N)decodelower).0ws     r   	<genexpr>zget_stoplist.<locals>.<genexpr>?   s"     N!QXXf%++-Ns   )+)	r   r   r"   pkgutilget_dataIOError
ValueErrorr&   
splitlines)language	file_path	stopwordss      r   get_stoplistr=   3   s{    [(X*=>I
$$Y	:	 Ny7K7K7MNNN  
@BJK
 	

s   A A3)
__future__r   r   r   r   rer   r   r5   compileUNICODEr   r   r
   r   r,   r=    r   r   <module>rC      sN    ' A A 	 	 
 (bjj< F9*%Or   