
    #h)                     d   d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ  ej*                  e      Zd
e
de
fdZ	 	 	 	 ddee   dededee   dedeeeef      fdZdedeee      fdZd
e
ddfdZd
e
ddfdZd
e
ddfdZddZ edk(  r e         yy)z,
Implements a basic command-line interface.
    N)ProcessPoolExecutoras_completed)islice)AnyIteratorListOptionalTuple   )	check_url)_make_sample)UrlStoreargsreturnc                    t        j                  d      }|j                  dd      }|j                  dddt        d	       |j                  d
ddt        d	       |j                  dddt               |j                  dddd       |j                  dddt
               |j                  dd      }|j                  ddd       |j                  dddt               |j                  d d!d"d       |j                  d#d$      }|j                  d%d&t
               |j                  d'd(t
               |j                  d)d*t
               |j                         S )+z(Define parser for command-line argumentsz"Command-line interface for Courlan)descriptionzI/OzManage input and outputz-iz--inputfilezname of input file (required)T)helptyperequiredz-oz--outputfilezname of output file (required)z-dz--discardedfilez/name of file to store discarded URLs (optional))r   r   z-vz	--verbosezincrease output verbosity
store_true)r   actionz-pz
--parallelz4number of parallel processes (not used for sampling)	FilteringzConfigure URL filtersz--strictzperform more restrictive testsz-lz
--languagez$use language filter (ISO 639-1 code)z-rz--redirectszcheck redirectsSamplingz+Use sampling by host, configure sample sizez--samplezsize of sample per domainz--exclude-maxz%exclude domains with more than n URLsz--exclude-minz%exclude domains with less than n URLs)argparseArgumentParseradd_argument_groupadd_argumentstrint
parse_args)r   
argsparsergroup1group2group3s        H/var/www/html/sandstorm/venv/lib/python3.12/site-packages/courlan/cli.pyr    r       s   ((8J **52KLF
,   -   >	   k ;L   C	   **;8OPF
9,   l!Gc   m"3L   **AF 
)D3O
EC   EC     ""    urlsstrictwith_redirectslanguagewith_navc                     g }| D ]>  }t        |||||      }||j                  d|d   f       ,|j                  d|f       @ |S )z6Internal function to be used with CLI multiprocessing.)r(   r)   r*   r+   Tr   F)r   append)r'   r(   r)   r*   r+   resultsurlresults           r%   _cli_check_urlsr1   O   se     G ))
 NND&),-NNE3<() Nr&   	inputfilec              #      K   t        | ddd      5 }	 t        |d      D cg c]  }|j                          }}|s
	 ddd       y| 8c c}w # 1 sw Y   yxY ww)zRead input line in batchesrutf-8ignoreencodingerrorsi N)openr   strip)r2   inputfhlinebatchs       r%   _batch_linesr?   g   sg     	iwx	@ G.4We.DEdTZZ\EEE	 
 K	 E s+   AAAA	A
AAAc                 .   | j                   r$t        j                  t        j                         n#t        j                  t        j
                         t        dd| j                  | j                         }t        | j                        D ]  }|j                  |        t        | j                  dd      5 }t        || j                  | j                  | j                         D ]  }|j#                  |dz           	 ddd       y# 1 sw Y   yxY w)	zSample URLs on the CLI.TN)
compressedr*   r(   verbosewr5   r8   )exclude_minexclude_max
)rB   LOGGERsetLevelloggingDEBUGERRORr   r(   r?   r2   add_urlsr:   
outputfiler   samplerE   rF   write)r   urlstorer>   outputfhr/   s        r%   _cli_samplerS   q   s    ||&&$t{{DLLH dnn- !% ! 
doosW	5 'KK((((	
 	'C NN3:&	'' ' 's   :ADDc                    
 t         j                        5 
t         j                  dd      5 }t         j                  ddd      5 }	 g }t        |      dk  r8t        t        |d            }|sn |j                  |       t        |      dk  r8|sn 
fd	|D        }t        |      D ]p  }|j                         D ][  \  }}|r|j                  |d
z           j                  *t         j                  dd      5 }	|	j                  |       ddd       ] r 	 ddd       ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   &xY w# 1 sw Y   *xY w# 1 sw Y   yxY w)z7Read input file bit by bit and process URLs in batches.)max_workersrC   r5   rD   r4   r6   r7   i  c              3      K   | ]<  }j                  t        |j                  j                  j                          > yw))r(   r)   r*   N)submitr1   r(   	redirectsr*   ).0r>   r   executors     r%   	<genexpr>z_cli_process.<locals>.<genexpr>   sD      	  #;;#'>>!]]   	s   AArG   Na)r   parallelr:   rN   r2   lenlistr   r-   r   r0   rP   discardedfile)r   rR   r<   batches
line_batchfuturesfuturevalidr/   	discardfhrZ   s   `         @r%   _cli_processrg      sz   		7 $18TwF $1	4gh$1 
Gg,%!&$"78
!z*	 g,% 	 %	G 'w/ 	1"(--/ 1JE3 sTz2++7! ..g 1&%OOC01 11	1-  $1 $1 $1 $1B1 1C$1 $1 $1 $1 $1 $1s`   E"EA	E
AE
-E
D>E
%E-E">EE

EEE	E""E+c                 J    | j                   rt        |        yt        |        y)z+Start processing according to the argumentsN)rO   rS   rg   r   s    r%   process_argsrj      s    {{DTr&   c                  R    t        t        j                  dd       } t        |        y)zRun as a command-line utility.r   N)r    sysargvrj   ri   s    r%   mainrn      s    chhqrl#Dr&   __main__)FFNF)r   N)!__doc__r   rJ   rl   concurrent.futuresr   r   	itertoolsr   typingr   r   r   r	   r
   corer   samplingr   rQ   r   	getLogger__name__rH   r    r   boolr1   r?   rS   rg   rj   rn    r&   r%   <module>rz      s"     
 @  7 7  "  
		8	$7#S 7#S 7#x  "
s)  sm	
  
%c	
0C HT#Y$7 'c 'd '.&1s &1t &1Rs t  zF r&   