
    :Qgw
                        U d dl mZ d dlZd dlmZ d dlmZmZmZ d dl	Z	d dl	m
Z d dl	mZ d dl	mZ dZd	ed
<   d ZddZ e       d        Z ee      dd       Z ee      dd       Z ee      dd       Z
y)    )annotationsN)	lru_cache)FinalListTuple)pos_tag)sent_tokenize)word_tokenize   z
Final[int]CACHE_MAX_SIZEc                 `    t        j                  dd       t        j                  dd       y )Naveraged_perceptron_tagger_engT)quiet	punkt_tab)nltkdownload     V/var/www/html/answerous/venv/lib/python3.12/site-packages/unstructured/nlp/tokenize.pydownload_nltk_packagesr      s    MM2$?MM+T*r   c                .   g }t         j                  j                  D ]D  }|j                  d      s t        j                  j                  |d      }|j                  |       F 	 t        j                  | d|  |       y# t        t        f$ r Y yw xY w)zEChecks to see if the specified NLTK package exists on the file system	nltk_data/)pathsTF)
r   datapathendswithosjoinappendfindLookupErrorOSError)package_namepackage_categoryr   r   s       r   check_for_nltk_packager&      s    E		 }}[)77<<k2DT
		%&a~6eD! s   %B BBc                 V    t        dd      } t        dd      }|r| st                yy)z;If required NLTK packages are not available, download them.taggersr   )r%   r$   
tokenizersr   N)r&   r   )tagger_availabletokenizer_availables     r   &_download_nltk_packages_if_not_presentr,   %   s:     ."5 1%K  )9  *:r   )maxsizec                ,    t                t        |       S )zFA wrapper around the NLTK sentence tokenizer with LRU caching enabled.)r,   _sent_tokenizetexts    r   r	   r	   5        +,$r   c                ,    t                t        |       S )zBA wrapper around the NLTK word tokenizer with LRU caching enabled.)r,   _word_tokenizer0   s    r   r
   r
   <   r2   r   c                    t                t        |       }g }|D ]'  }t        |      }|j                  t	        |             ) |S )z>A wrapper around the NLTK POS tagger with LRU caching enabled.)r,   r/   r4   extend_pos_tag)r1   	sentencesparts_of_speechsentencetokenss        r   r   r   C   sO     +, t$I-/O 1)x/01 r   )r$   strr%   r<   returnbool)r1   r<   r=   z	List[str])r1   r<   r=   zList[Tuple[str, str]])
__future__r   r   	functoolsr   typingr   r   r   r   r   r7   r	   r/   r
   r4   r   __annotations__r   r&   r,   r   r   r   <module>rC      s    " 	  % %  $ 0 0 
  +
" ! ! >"  #  >"  #  >" #r   