
    :Qg!                     v    d dl Z d dlZd dlZd dlmZmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ  G d d	e      Zy)
    N)zipxrange   )	ErrorCodeLangDetectException)Language)NGram)unicode_blockc                       e Zd ZdZdZdZdZdZdZdZ	dZ
 ej                  d	      Z ej                  d
      Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)Detectoraj  
    Detector class is to detect language from specified text.
    Its instance is able to be constructed via the factory class DetectorFactory.

    After appending a target text to the Detector instance with .append(string),
    the detector provides the language detection results for target text via .detect() or .get_probabilities().

    .detect() method returns a single language name which has the highest probability.
    .get_probabilities() methods returns a list of multiple languages and their probabilities.

    The detector has some parameters for language detection.
    See set_alpha(double), .set_max_text_length(int) .set_prior_map(dict).

    Example:

        from langdetect.detector_factory import DetectorFactory
        factory = DetectorFactory()
        factory.load_profile('/path/to/profile/directory')

        def detect(text):
            detector = factory.create()
            detector.append(text)
            return detector.detect()

        def detect_langs(text):
            detector = factory.create()
            detector.append(text)
            return detector.get_probabilities()
    g      ?g?i  g?gwJ?'  unknownz'https?://[-_.?&~;+=/#0-9A-Za-z]{1,2076}z>[-_.0-9A-Za-z]{1,64}@[-_0-9A-Za-z]{1,255}[-_.0-9A-Za-z]{1,255}c                    |j                   | _         |j                  | _        |j                  | _        t        j                         | _        d| _        d | _        | j                  | _        d| _	        d| _
        d | _        d| _        y )N    r   F)word_lang_prob_maplanglistseedrandomRandomtextlangprobALPHA_DEFAULTalphan_trialmax_text_length	prior_mapverbose)selffactorys     P/var/www/html/answerous/venv/lib/python3.12/site-packages/langdetect/detector.py__init__zDetector.__init__8   sm    ")"<"<((LL	mmo	''
$    c                     d| _         y )NT)r   r   s    r!   set_verbosezDetector.set_verboseF   s	    r#   c                     || _         y N)r   )r   r   s     r!   	set_alphazDetector.set_alphaI   s	    
r#   c                    dgt        | j                        z  | _        d}t        t        | j                              D ]N  }| j                  |   }||v s||   }|dk  rt	        t
        j                  d      || j                  |<   ||z  }P |dk  rt	        t
        j                  d      t        t        | j                              D ]  }| j                  |xx   |z  cc<    y)z3Set prior information about language probabilities.        r   z'Prior probability must be non-negative.z/More one of prior probability must be non-zero.N)lenr   r   r   r   r   InitParamError)r   r   sumpilangps         r!   set_prior_mapzDetector.set_prior_mapL   s    T]]!33DNN+, 	A==#Dy dOq5-i.F.FHqrr$%q!		 3;%i&>&>@qrrDNN+, 	&ANN1%	&r#   c                     || _         y)zqSpecify max size of target text to use for language detection.
        The default value is 10000(10KB).
        N)r   )r   r   s     r!   set_max_text_lengthzDetector.set_max_text_length]   s      /r#   c                 H   | j                   j                  d|      }| j                  j                  d|      }t        j                  |      }d}t        t        t        |      | j                              D ](  }||   }|dk7  s|dk7  r| xj                  |z  c_	        |}* y)zAppend the target text for language detection.
        If the total size of target text exceeds the limit size specified by
        Detector.set_max_text_length(int), the rest is cut down.
         r   N)
URL_REsubMAIL_REr	   normalize_vir   minr,   r   r   )r   r   prer/   chs        r!   appendzDetector.appendc   s    
 {{sD)||T*!!$'CIt';';<= 	AaBSyC3J		R	C		r#   c                    d\  }}| j                   D ]C  }d|cxk  rdk  r	n n|dz  }|t        j                  d      k\  s0t        |      dk7  s?|dz  }E |dz  |k  r+d}| j                   D ]  }|dk  sd|k  s||z  } || _         y	y	)
zCleaning text to detect
        (eliminate URL, e-mail address and Latin sentence if it is not written in Latin alphabet).
        )r   r   Azr   u   ̀zLatin Extended Additional   r   N)r   sixur
   )r   latin_countnon_latin_countr=   text_without_latins        r!   cleaning_textzDetector.cleaning_textr   s     (,$_)) 	%BbCq suuX&=+<@[+[1$		% ?_,!#ii -8sRx&",&- +DI -r#   c                 \    | j                         }|r|d   j                  S | j                  S )zsDetect language of the target text and return the language name
        which has the highest probability.
        r   )get_probabilitiesr0   UNKNOWN_LANG)r   probabilitiess     r!   detectzDetector.detect   s2     ..0 #(((   r#   c                 p    | j                   | j                          | j                  | j                         S r(   )r   _detect_block_sort_probabilityr%   s    r!   rJ   zDetector.get_probabilities   s-    ==  %%dmm44r#   c                    | j                          | j                         }|st        t        j                  d      dgt        | j                        z  | _        | j                  j                  | j                         t        | j                        D ]b  }| j                         }| j                  | j                  j                  dd      | j                  z  z   }d}	 | j!                  || j                  j#                  |      |       |dz  dk(  r_| j%                  |      | j&                  kD  s|| j(                  k\  rn7| j*                  r%t-        j.                  d| j1                  |             |dz  }t        t        | j                              D ])  }| j                  |xx   ||   | j                  z  z  cc<   + | j*                  s>t-        j.                  d| j1                  |             e y )	NzNo features in text.r+         ?r      >r   z==>)rH   _extract_ngramsr   r   CantDetectErrorr,   r   r   r   r   r   r   _init_probabilityr   gaussALPHA_WIDTH_update_lang_probchoice_normalize_probCONV_THRESHOLDITERATION_LIMITr   rC   print_rP   )r   ngramstprobr   r/   js          r!   rO   zDetector._detect_block   s   %%'%i&?&?AWXXDMM 22#% 	@A))+DJJ!2!23!<t?O?O!OOEA&&tT[[-?-?-GOq5A:++D1D4G4GG1PTPdPdKd||

3(>(>t(DEQ  C./ ;a DGdll$:: ;||

5$"8"8">?!	@r#   c                     | j                   t        | j                         S dt        | j                        z  gt        | j                        z  S )zzInitialize the map of language probabilities.
        If there is the specified prior map, use it as initial map.
        rR   )r   listr,   r   r%   s    r!   rW   zDetector._init_probability   sB     >>%''#dmm,,-DMM0BBBr#   c                 |   t        t        dt        j                  dz               }g }t               }| j                  D ]z  }|j                  |       |j                  r!|D ]U  }t        |j                        |k  r @|j                  | d }|s0|dk7  s6|| j                  v sE|j                  |       W | |S )z!Extract n-grams from target text.r   Nr6   )re   r   r	   N_GRAMr   add_charcapitalwordr,   gramsr   r>   )r   RANGEresultngramr=   nws          r!   rU   zDetector._extract_ngrams   s    VAu||a/01)) 
	%BNN2   %u{{#a'KK$ca4+B+B&BMM!$%	
	% r#   c           	      H   ||| j                   vry| j                   |   }| j                  r;t        j                  |d| j	                  |      d| j                  |             || j                  z  }t        t        |            D ]  }||xx   |||   z   z  cc<    y)z:Update language probabilities with N-gram string(N=1,2,3).F(z): T)	r   r   rC   r_   _unicode_encode_word_prob_to_string	BASE_FREQr   r,   )r   rb   wordr   lang_prob_mapweightr/   s          r!   rZ   zDetector._update_lang_prob   s    <4t'>'>>//5<<JJtT-A-A$-GIbIbcpIqrs'D	" 	1AGva 000G	1r#   c                     d}t        t        |            D ]$  }||   }|dk\  s|d| j                  |   |fz  z  }& |S )Nr   gh㈵>z %s:%.5f)r   r,   r   )r   rb   rl   rc   r1   s        r!   rs   zDetector._word_prob_to_string   sS    D	" 	=AQAG|*a(8!'<<<	= r#   c                 z    dt        |      }}t        t        |            D ]  }||   |z  }||k  r|}|||<    |S )zRNormalize probabilities and check convergence by the maximun probability.
        r+   )sumr   r,   )r   rb   maxpr.   r/   r1   s         r!   r\   zDetector._normalize_prob   sR     #d)dD	" 	AQ$AaxDG		
 r#   c                     t        | j                  |      D cg c]!  \  }}|| j                  kD  st        ||      # }}}|j	                  d       |S c c}}w )NT)reverse)r   r   PROB_THRESHOLDr   sort)r   rb   r0   r1   rl   s        r!   rP   zDetector._sort_probability   sS    585Mi	qQRUYUhUhQh(4#iiD! js
   AAc                     d}|D ]g  }|t        j                  d      k\  rHt        dt        |      z         dd  }t	        |      dk  rd|z   }t	        |      dk  r|d|dd	 z   z  }c||z  }i |S )
Nr      i   rB      0z\ur   rS   )rC   rD   hexordr,   )r   ru   bufr=   sts        r!   rr   zDetector._unicode_encode   s     	BSUU8_$3r7*+AB/"gkrB "gkur!Aw&r		 
r#   N) __name__
__module____qualname____doc__r   rY   r^   r~   r]   rt   rK   recompiler7   r9   r"   r&   r)   r2   r4   r>   rH   rM   rJ   rO   rW   rU   rZ   rs   r\   rP   rr    r#   r!   r   r      s    < MKONNILRZZBCFbjjZ[G&"/+$!5
@6C&	

r#   r   )r   r   rC   	six.movesr   r   lang_detect_exceptionr   r   languager   utils.ngramr	   utils.unicode_blockr
   objectr   r   r#   r!   <module>r      s+     	 
 ! A   .lv lr#   