
    :Qg>                        d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ dd	lm Z  dd
lm!Z! ddlm"Z" ddlm#Z# ddl$m%Z% ddl$m&Z&  ejN                  e(      Z) G d de      Z* G d d      Z+ G d de+      Z, G d de+      Z- G d de-      Z. G d de+      Z/ G d de/      Z0 G d de,      Z1 G d d e/      Z2 G d! d"e,      Z3 G d# d$e/      Z4 G d% d&      Z5 G d' d(e"e          Z6d)ee7   d*dfd+Z8e(d,k(  r e8ejr                         yy)-z Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalTextIOTupleUnioncastSet)PDFExceptionPDFTypeError   )name2unicode)KWD)PSEOFPSSyntaxError)	PSKeyword)	PSLiteral)PSStackParser)literal_name)choplist)nunpackc                       e Zd Zy)	CMapErrorN__name__
__module____qualname__     L/var/www/html/answerous/venv/lib/python3.12/site-packages/pdfminer/cmapdb.pyr   r   1   s    r$   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eeee
f   ddfdZddZd
edee
   fdZy)CMapBaser   kwargsreturnNc                 .    |j                         | _        y N)copyattrsselfr(   s     r%   __init__zCMapBase.__init__9   s    28++-
r$   c                 @    | j                   j                  dd      dk7  S )NWModer   r-   getr/   s    r%   is_verticalzCMapBase.is_vertical<   s    zz~~gq)Q..r$   kvc                 "    || j                   |<   y r+   )r-   )r/   r7   r8   s      r%   set_attrzCMapBase.set_attr?   s    

1r$   codecidc                      y r+   r#   )r/   r;   r<   s      r%   add_code2cidzCMapBase.add_code2cidB       r$   c                      y r+   r#   )r/   r<   r;   s      r%   add_cid2unichrzCMapBase.add_cid2unichrE   r?   r$   c                      y r+   r#   )r/   cmaps     r%   use_cmapzCMapBase.use_cmapH   r?   r$   c                     t         r+   )NotImplementedError)r/   r;   s     r%   decodezCMapBase.decodeK   s    !!r$   )rC   r'   r)   N)r    r!   r"   debugobjectr0   boolr6   strr:   intr>   r   r   bytesrA   rD   r   rG   r#   r$   r%   r'   r'   5   s    E@ @D @/T /# & T  3 4 # U9eS3H-I d "5 "Xc] "r$   r'   c            	           e Zd Zdeeef   ddfdZdefdZdeddfdZ	de
dee   fd	Zej                  dd
fdedeeeef      deedf   ddfdZy)CMapr(   r)   Nc                 >    t        j                  | fi | i | _        y r+   )r'   r0   code2cidr.   s     r%   r0   zCMap.__init__P   s    $)&)+-r$   c                 >    d| j                   j                  d      z  S )Nz
<CMap: %s>CMapNamer3   r5   s    r%   __repr__zCMap.__repr__T   s    djjnnZ888r$   rC   c                     t        |t              sJ t        t        |                   dt        t
        t        f   dt        t
        t        f   dd ffd | j                  |j                         y )Ndstsrcr)   c                     |j                         D ]+  \  }}t        |t              ri }|| |<    ||       '|| |<   - y r+   )items
isinstancedict)rV   rW   r7   r8   dr,   s        r%   r,   zCMap.use_cmap.<locals>.copyZ   sF    ))+ Aa&+-ACFAJCFr$   )rZ   rO   rK   typer   rL   rI   rQ   )r/   rC   r,   s     @r%   rD   zCMap.use_cmapW   s`    $%6s4:6%	d3;' 	d3;.? 	D 	 	T]]DMM*r$   r;   c              #     K   t         j                  d| |       | j                  }t        |      D ]V  }||v rD||   }t	        |t
              r| | j                  }-t        t        t
        t        f   |      }K| j                  }X y w)Nzdecode: %r, %r)	logrH   rQ   iterrZ   rL   r   r   rI   )r/   r;   r\   ixs        r%   rG   zCMap.decodee   sy     		"D$/MMd 		"AAvaDa%GAT#v+.2AMM		"s   B	Br#   outrQ   .c           	         || j                   }d}t        |j                               D ]a  \  }}||fz   }t        |t              r|j                  d||fz         3| j                  |t        t        t        t        f   |      |       c y )Nr#   zcode %r = cid %d
)rc   rQ   r;   )
rQ   sortedrY   rZ   rL   writedumpr   r   rI   )r/   rc   rQ   r;   r7   r8   cs          r%   rg   z	CMap.dumps   s     }}HDX^^-. 	PFQtA!S!		.!Q78		cDc6k1BA,FQ	O	Pr$   )r    r!   r"   r   rK   rL   r0   rT   r'   rD   rM   r   rG   sysstdoutr   r
   r   rI   r   rg   r#   r$   r%   rO   rO   O   s    .sCx .T .9# 9+X +$ +"5 "Xc] "  jj04 "	PP 4V,-P CHo	P
 
Pr$   rO   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapr;   r)   .c                 V    t        |      dz  }|rt        j                  d|z  |      S y)N   z>%dHr#   lenstructunpackr/   r;   ns      r%   rG   zIdentityCMap.decode   s*    IN==!T22r$   Nr    r!   r"   rM   r   rL   rG   r#   r$   r%   rl   rl          5 U38_ r$   rl   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapByter;   r)   .c                 P    t        |      }|rt        j                  d|z  |      S y)Nz>%dBr#   ro   rs   s      r%   rG   zIdentityCMapByte.decode   s&    I==!T22r$   Nru   r#   r$   r%   rx   rx      rv   r$   rx   c                   j    e Zd Zdeeef   ddfdZdefdZdedefdZe	j                  fdeddfd	Zy)

UnicodeMapr(   r)   Nc                 >    t        j                  | fi | i | _        y r+   )r'   r0   
cid2unichrr.   s     r%   r0   zUnicodeMap.__init__   s    $)&)*,r$   c                 >    d| j                   j                  d      z  S )Nz<UnicodeMap: %s>rS   r3   r5   s    r%   rT   zUnicodeMap.__repr__   s    !DJJNN:$>>>r$   r<   c                 N    t         j                  d| |       | j                  |   S )Nget_unichr: %r, %r)r_   rH   r}   r/   r<   s     r%   
get_unichrzUnicodeMap.get_unichr   s"    		&c2s##r$   rc   c                     t        | j                  j                               D ]  \  }}|j                  d||fz          y )Nzcid %d = unicode %r
)re   r}   rY   rf   )r/   rc   r7   r8   s       r%   rg   zUnicodeMap.dump   s=    T__2245 	8FQII-A67	8r$   )r    r!   r"   r   rK   rL   r0   rT   r   ri   rj   r   rg   r#   r$   r%   r{   r{      sX    -sCx -T -?# ?$c $c $ "% 8 8 8r$   r{   c                       e Zd ZdedefdZy)IdentityUnicodeMapr<   r)   c                 F    t         j                  d| |       t        |      S )z+Interpret character id as unicode codepointr   )r_   rH   chrr   s     r%   r   zIdentityUnicodeMap.get_unichr   s    		&c23xr$   N)r    r!   r"   rL   rK   r   r#   r$   r%   r   r      s    c c r$   r   c                        e Zd ZdededdfdZy)FileCMapr;   r<   r)   Nc                 P   t        |t              rt        |t              s$J t        t        |      t        |      f             | j                  }|d d D ];  }t        |      }||v r!t        t        t        t        f   ||         }3i }|||<   |}= t        |d         }|||<   y )N)	rZ   rK   rL   r]   rQ   ordr   r   rI   )r/   r;   r<   r\   rh   cits          r%   r>   zFileCMap.add_code2cid   s    $$C)= 	
s$Zc#@
 	
= MMcr 	AQBQwc6k*AbE2')"	 b]"r$   )r    r!   r"   rK   rL   r>   r#   r$   r%   r   r      s     3 4 r$   r   c                   ,    e Zd Zdedeeeef   ddfdZy)FileUnicodeMapr<   r;   r)   Nc                    t        |t              sJ t        t        |                   t        |t              r2t        |j
                  t              sJ t        |j
                        }nJt        |t              r|j                  dd      }n't        |t              rt        |      }nt        |      |dk(  r| j                  j                  |      dk(  ry || j                  |<   y )NzUTF-16BEignore     )rZ   rL   rK   r]   r   namer   rM   rG   r   r   r}   r4   )r/   r<   r;   unichrs       r%   rA   zFileUnicodeMap.add_cid2unichr   s    #s#3Sc^3#dI&dii---!$)),Fe$[[X6Fc"YFt$$ X$//"5"5c":c"A%r$   )r    r!   r"   rL   r   r   rM   rA   r#   r$   r%   r   r      s(    &# &U9eS3H-I &d &r$   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler)   Nc                     t         |   |       |j                  | _        |j                  rd| j
                  d<   y y N)rS   r   r2   )superr0   CODE2CIDrQ   IS_VERTICALr-   )r/   r   r   	__class__s      r%   r0   zPyCMap.__init__   s:    $'"#DJJw r$   )r    r!   r"   rK   r   r0   __classcell__r   s   @r%   r   r      s"    $S $# $$ $ $r$   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr)   Nc                     t         |   |       |r!|j                  | _        d| j                  d<   y |j
                  | _        y r   )r   r0   CID2UNICHR_Vr}   r-   CID2UNICHR_H)r/   r   r   r   r   s       r%   r0   zPyUnicodeMap.__init__   s>    $'$11DO"#DJJw$11DOr$   )r    r!   r"   rK   r   rJ   r0   r   r   s   @r%   r   r      s)    2S 2# 2 2$ 2 2r$   r   c                       e Zd ZU i Zeeef   ed<   i Zeee	e
   f   ed<    G d de      Zededefd       Zededefd       Zedded	edefd
       Zy)CMapDB_cmap_cache_umap_cachec                       e Zd Zy)CMapDB.CMapNotFoundNr   r#   r$   r%   CMapNotFoundr      s    r$   r   r   r)   c           	         |j                  dd      }d|z  }t        j                  d|       t        j                  j                  dd      t        j                  j                  t        j                  j                  t              d      f}|D ]  }t        j                  j                  ||      }t        j                  j                  |      sCt        j                  |      }	 t        t        |      dt        j                   |j#                                     |j%                          c S  t&        j)                  |      # |j%                          w xY w)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/rC   r#   )replacer_   rH   osenvironr4   pathjoindirname__file__existsgzipopenr]   rK   pickleloadsreadcloser   r   )clsr   filename
cmap_paths	directoryr   gzfiles          r%   
_load_datazCMapDB._load_data   s    ||D"%!D(		-&JJNN;(>?GGLL2F;

 $ 		,I77<<	84Dww~~d#4#D	2v||FKKM/JKLLN		, %%d++ LLNs   *6E		Ec                 $   |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S 	 | j                  |   S # t        $ r Y nw xY w| j	                  |      }t        ||      x| j                  |<   }|S )Nz
Identity-Hr   )r2   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rl   rx   r   KeyErrorr   r   )r   r   datarC   s       r%   get_cmapzCMapDB.get_cmap  s    <a((\!a((''#!,,''#!,,	??4(( 		~~d#'-dD'99s   A 	A! A!r   c                     	 | j                   |   |   S # t        $ r Y nw xY w| j                  d|z        }dD cg c]  }t        |||       nc c}w c}| j                   |<   | j                   |   |   S )Nzto-unicode-%s)FT)r   r   r   r   )r   r   r   r   r8   s        r%   get_unicode_mapzCMapDB.get_unicode_map  sy    	??4(22 		~~o45FS TdD!!< T Tt$X..s    	  AN)F)r    r!   r"   r   r   rK   r   __annotations__r   r   r   r   r   classmethodr   r   r'   r   rJ   r{   r   r#   r$   r%   r   r      s    %'Kc6k"'13Kc4--.3y  ,c ,c , ,& C H  " /3 /$ /: / /r$   r   c                   L   e Zd ZdededdfdZddZ ed      Z ed      Z	 ed	      Z
 ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      ZdededdfdZdeddfdZy)
CMapParserrC   fpr)   Nc                 j    t        j                  | |       || _        d| _        t	               | _        y )NT)r   r0   rC   _in_cmapset	_warnings)r/   rC   r   s      r%   r0   zCMapParser.__init__!  s,    tR(	#&5r$   c                 D    	 | j                          y # t        $ r Y y w xY wr+   )
nextobjectr   r5   s    r%   runzCMapParser.run)  s-    	OO 	  		s    	s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 4   || j                   u rd| _        | j                          y|| j                  u rd| _        y| j                  sy|| j                  u rA	 | j                  d      \  \  }}\  }}| j                  j                  t        |      |       y|| j                  u rO	 | j                  d      \  \  }}| j                  j                  t        j                  t        |                   y|| j                  u r| j                          y|| j                   u r| j                          y|| j"                  u r| j                          y|| j$                  u rj| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]7  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt)        |t.              s| j-                  d	       nt1        |
      t1        |      k7  r| j-                  d
       |
dd }|dd }||k7  r| j-                  d       |
dd }|dd }t3        |      }t3        |      }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C : y|| j<                  u r| j                          y|| j>                  u ru| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]C  \  }}t)        |t*              st)        |t.              s(| j                  j;                  ||       E y|| j@                  u r| j                          y|| jB                  u r| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt1        |
      t1        |      k7  r| j-                  d       ut3        |
      }t3        |      }t)        |tD              rct1        |      ||z
  dz   k7  r| j-                  d       tG        t5        ||dz         |      D ]!  \  }}| j                  j;                  ||       # t)        |t*              sJ |dd }t3        |      }|dd }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C  y|| jH                  u r| j                          y|| jJ                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |t*              st)        |t*              s(| j                  j;                  t3        |      |       N y|| jL                  u r| j                          y|| jN                  u r| j                          y| jQ                  ||f       y# t        $ r Y yw xY w# t        $ r Y yt        j                  $ r Y yw xY wc c}}w c c}}w c c}}w c c}}w )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrn   r      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>LzThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpoprC   r:   r   r   KEYWORD_USECMAPrD   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rZ   rM   
_warn_oncerL   rp   r   rangerq   packrA   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r/   r   r   _r7   r8   cmapname__objobjs
start_byteend_byter<   start_prefix
end_prefixsvarevarstartendvlenra   rb   r;   unicode_valuevarbaseprefixs                              r%   
do_keywordzCMapParser.do_keywordA  s0   
 D*** DMKKMd***!DM}}D$$$#'88A; !Q!Q		""<?A6 D(((#'88A; !X		""6??<3I#JK
 D444KKMD222KKMD...KKMD,,,)-7IRC7D7/74/@ 9+Xs!*e4OO$VW!(E2OO$TU!#s+OO$TUz?c(m3OO- )#2%cr]
:-OO: !"#}dm4ysU{Q/ 9A$v{{4'CTEF'KKAII,,S1Wa89;9@ D---KKMD+++)-7IRC7D7'40 8ddE*z#s/CII,,S$78 D---KKMD+++)-7IRC7D708D0A ?,Xt!*e4OO$EF!(E2OO$CDz?c(m3OO$TU
+h'dD)4yC%K!O3F /2%sQw2G.N E*]		00mDE &dE222rs)C"3<D!#2YFs8D"3;?3 ?"V[[tax%@$%HH		00A>?5?: D,,,KKMD***)-7IRC7D7'40 Adc5)ju.EII,,WS\4@A D111KKMD///KKM		3,A !  !   &&   8P 8 8J 8sC   ?Y (AY 5ZZ/ZZ	YY	Y?*Y?>Y?msgc                     || j                   vr6| j                   j                  |       d}t        j                  ||z          yy)z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addr_   warning)r/   r  base_msgs      r%   r   zCMapParser._warn_once  sA    dnn$NNs#/ 
 KK3' %r$   )r)   N)r    r!   r"   r'   r   r0   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rL   r   r
  rK   r   r#   r$   r%   r   r      s   X 8   L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23U c U ) U  U n	(c 	(d 	(r$   r   argvr)   c                     ddl m}  |dt               | dd  }|D ]R  }t        |d      }t	               }t        ||      j                          |j                          |j                          T y )Nr   )warnzThe function main() from cmapdb.py will be removed in 2023. It was probably introduced for testing purposes a long time ago, and no longer relevant. Feel free to create a GitHub issue if you disagree.r   rb)	warningsr  DeprecationWarningr   r   r   r   r   rg   )r  r  argsfnamer   rC   s         r%   mainr    sq    	> 		 8D %4  "

		 r$   __main__):__doc__r   loggingr   os.pathr   rq   ri   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   pdfminer.pdfexceptionsr   r   
encodingdbr   psparserr   pdfminer.psexceptionsr   r   r   r   r   r   utilsr   r   	getLoggerr    r_   r   r'   rO   rl   rx   r{   r   r   r   r   r   r   r   rK   r  r  r#   r$   r%   <module>r$     s<  	   	    
     > $  6   # "  g!	 	" "42P8 2Pj8 | 8 8" t $&Z &*$T $2: 26/ 6/rA(y) A(HtCy T ( zN r$   