
    :Qg                        d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddlm'Z' ddlm(Z( ddl)m*Z* ddl+m,Z, ddl+m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddlm7Z7m8Z8m9Z9m:Z:m;Z;m<Z< dd lm=Z= dd!lm>Z> dd"lm?Z? dd#lm@Z@  ej                  eB      ZC G d$ d%e*      ZD G d& d'eD      ZE ed(e
ee7      ZF G d) d*eDeeF         ZG G d+ d,eGe7         ZH G d- d.eGe7         ZI G d/ d0eGe7         ZJ G d1 d2eGe7         ZKy)3    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)PDFColorSpace   )utils)ImageWriter)LAParamsLTComponentTextGroupElement)LTAnno)LTChar)LTContainer)LTCurve)LTFigure)LTImageLTItem)LTLayoutContainer)LTLine)LTPage)LTRect)LTText)	LTTextBox)LTTextBoxVertical)LTTextGroup)
LTTextLine)PDFTextDevice)PDFFont)PDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)PDFValueError)AnyIOPointMatrixRectPathSegmentmake_compat_str)apply_matrix_pt)bbox2str)enc)mult_matrixc                      e Zd ZU eed<   eed<   	 	 d%dededee	   ddfdZ
d	ededdfd
Zd	eddfdZdedededdfdZdeddfdZdededdfdZdededededee   ddfdZdededededededed edefd!Zdededefd"Zd#eddfd$Zy)&PDFLayoutAnalyzercur_itemctmNrsrcmgrpagenolaparamsreturnc                 Z    t        j                  | |       || _        || _        g | _        y N)r&   __init__r=   r>   _stackselfr<   r=   r>   s       O/var/www/html/answerous/venv/lib/python3.12/site-packages/pdfminer/converter.pyrB   zPDFLayoutAnalyzer.__init__:   s)     	tW- /1    pagec                     |j                   \  }}}}t        |||f      \  }}t        |||f      \  }}ddt        ||z
        t        ||z
        f}t        | j                  |      | _        y )Nr   )mediaboxr4   absr   r=   r:   )rE   rH   r;   x0y0x1y1rJ   s           rF   
begin_pagezPDFLayoutAnalyzer.begin_pageE   sl    ==RR"3R1R"3R1Rq#b2g,BG5t{{H5rG   c                    | j                   r#J t        t        | j                                      t        | j                  t
              s#J t        t        | j                                     | j                  %| j                  j                  | j                         | xj                  dz  c_	        | j                  | j                         y )Nr   )rC   strlen
isinstancer:   r   typer>   analyzer=   receive_layout)rE   rH   s     rF   end_pagezPDFLayoutAnalyzer.end_pageL   s    ;;5C$4 55$--0J#d4==6I2JJ0==$MM!!$--0qDMM*rG   namebboxmatrixc                     | j                   j                  | j                         t        ||t	        || j
                              | _        y rA   )rC   appendr:   r   r7   r;   )rE   rY   rZ   r[   s       rF   begin_figurezPDFLayoutAnalyzer.begin_figureT   s3    4==) t[-JKrG   _c                 
   | j                   }t        | j                   t              s#J t        t	        | j                                      | j
                  j                         | _         | j                   j                  |       y rA   )r:   rT   r   rR   rU   rC   popadd)rE   r_   figs      rF   
end_figurezPDFLayoutAnalyzer.end_figureX   sV    mm$--2LCT]]8K4LL2)#rG   streamc                 v   t        | j                  t              s#J t        t	        | j                                     t        ||| j                  j                  | j                  j                  | j                  j                  | j                  j                  f      }| j                  j                  |       y rA   )rT   r:   r   rR   rU   r   rL   rM   rN   rO   rb   )rE   rY   re   items       rF   render_imagezPDFLayoutAnalyzer.render_image^   s    $--2LCT]]8K4LL2]]t}}//1A1A4==CSCST

 	$rG   gstatestrokefillevenoddpathc                    dj                  d |D              }|dd dk7  ry|j                  d      dkD  rTt        j                  d|      D ]:  }||j	                  d      |j                  d       }| j                  |||||       < y|D 	cg c]%  }	t        t        |	d   dk7  r|	d	d n|d   d	d       ' }
}	|
D cg c]  }t        | j                  |       }}|D cg c]  }t        |d          }}|D cg c]U  }t        |ddd
   |d
dd
         D cg c]/  \  }}t        | j                  t        |      t        |      f      1 c}}W }}}}t        ||      D 	cg c]  \  }}	t        t        |g|	       }}}	|dv r_t        |j                   |d   |d   ||||j"                  |j$                  ||j&                  
      }| j(                  j+                  |       y|dv r|\  \  }}\  }}\  }}\  }}}|d   |d   k(  }||k(  xr ||k(  xr ||k(  xr ||k(  xs ||k(  xr ||k(  xr ||k(  xr ||k(  }|rd|rbt-        |j                   g |d   |d
   ||||j"                  |j$                  ||j&                  	      } | j(                  j+                  |        yt/        |j                   |||||j"                  |j$                  ||j&                  	      }!| j(                  j+                  |!       yt/        |j                   |||||j"                  |j$                  ||j&                  	      }!| j(                  j+                  |!       yc c}	w c c}w c c}w c c}}w c c}}}w c c}	}w )z@Paint paths described in section 4.4 of the PDF reference manual c              3   &   K   | ]	  }|d      yw)r   N ).0xs     rF   	<genexpr>z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>p   s     +!+s   Nr   mzm[^m]+r   h   >   mlmlh)original_pathdashing_style>   mlllhmllll   )joincountrefinditerstartend
paint_pathr   r/   r4   r;   rR   zipfloatr2   r   	linewidthscolorncolordashr:   rb   r    r   )"rE   ri   rj   rk   rl   rm   shaperu   subpathpraw_ptsptpts	operation	operatorsoperand1operand2transformed_pointsotransformed_pathlinerL   rM   rN   rO   x2y2x3y3r_   is_closed_loophas_square_coordinatesrectcurves"                                     rF   r   zPDFLayoutAnalyzer.paint_pathg   s    +d++!9 [[![[E2 HqwwqzAEE!H5gwGH OSIJUadckAbcFtAwrs|DG  <CCR?488R0CCC<@AyYq\*AIA "&" "
  /2)ADqD/9QTPQT?.S*( $DHHuXh.PQ" "  	+=> Aq [1'q'*   
 %
 $$FFMMMM"2"(++ !!$',,<?9R(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' "&<!((*#a&*3q6*(
D MM%%d+#(((
E MM%%e,$$MMMM$KK
 !!%(e DA" s0   *M>M"!M'> M24M,M2+M9,M2fontfontsizescalingrisecidncsgraphicstatec	                 v   	 |j                  |      }	t        |	t              sJ t        t        |	                   	 |j                  |      }
|j                  |      }t        ||||||	|
|||
      }| j                  j                  |       |j                  S # t        $ r | j                  ||      }	Y zw xY wrA   )	to_unichrrT   rR   rU   r(   handle_undefined_char
char_width	char_dispr   r:   rb   adv)rE   r[   r   r   r   r   r   r   r   text	textwidthtextdisprg   s                rF   render_charzPDFLayoutAnalyzer.render_char   s    	9>>#&DdC(9#d4j/9( OOC(	>>#&
 	$xx# $ 	9--dC8D	9s   :B B87B8c                 :    t         j                  d||       d|z  S )Nzundefined: %r, %rz(cid:%d))logdebug)rE   r   r   s      rF   r   z'PDFLayoutAnalyzer.handle_undefined_char   s    		%tS1CrG   ltpagec                      y rA   rq   rE   r   s     rF   rW   z PDFLayoutAnalyzer.receive_layout  s    rG   r   N) __name__
__module____qualname__r   __annotations__r0   r*   intr   r   rB   r+   rP   rX   rR   r1   r^   rd   r,   rh   r)   boolr   r2   r   r'   r   r   r   r   r   rW   rq   rG   rF   r9   r9   6   s   	K
 '+		2#	2 	2 8$		2
 
	26w 6V 6 6+W + +L LD L& LT LC D    i  D  t)t) t) 	t)
 t) {#t) 
t)l  	
     & 
B '      V  rG   r9   c            	       L    e Zd Z	 	 d
dededee   ddfdZdeddfdZ	defd	Z
y)PDFPageAggregatorNr<   r=   r>   r?   c                 D    t         j                  | |||       d | _        y N)r=   r>   )r9   rB   resultrD   s       rF   rB   zPDFPageAggregator.__init__  s"     	""4("S(,rG   r   c                     || _         y rA   r   r   s     rF   rW   z PDFPageAggregator.receive_layout  s	    rG   c                 6    | j                   J | j                   S rA   r   rE   s    rF   
get_resultzPDFPageAggregator.get_result  s    {{&&&{{rG   r   )r   r   r   r*   r   r   r   rB   r   rW   r   rq   rG   rF   r   r     sY     '+	-#- - 8$	-
 
-V  F rG   r   IOTypec                   T    e Zd Z	 	 	 d
dededededee   ddfdZ	e
dedefd	       Zy)PDFConverterNr<   outfpcodecr=   r>   r?   c                     t         j                  | |||       || _        || _        | j	                  | j                        | _        y r   )r9   rB   r   r   _is_binary_streamoutfp_binary)rE   r<   r   r   r=   r>   s         rF   rB   zPDFConverter.__init__  s@     	""4("S"

 224::>rG   c                     dt        | dd      v ryt        | d      ryt        | t        j                        ryt        | t        j
                        ryt        | t        j                        ryy)z"Test if an stream is binary or notbmodero   TF)getattrhasattrrT   ioBytesIOStringIO
TextIOBase)r   s    rF   r   zPDFConverter._is_binary_stream*  s[     '%,,UF#rzz*r{{+r}}-rG   )utf-8r   N)r   r   r   r*   r   rR   r   r   r   rB   staticmethodr.   r   r   rq   rG   rF   r   r     ss    
 '+?#? ? 	?
 ? 8$? 
?  4  rG   r   c                        e Zd Z	 	 	 	 	 ddededededee   de	dee
   d	df fd
Zded	dfdZded	dfdZdeded	dfdZdede	de	de	dee   d	dfdZ xZS )TextConverterNr<   r   r   r=   r>   
showpagenoimagewriterr?   c                 J    t         |   |||||       || _        || _        y )Nr   r=   r>   )superrB   r   r   )	rE   r<   r   r   r=   r>   r   r   	__class__s	           rF   rB   zTextConverter.__init__=  s,     	%uVhW$&rG   r   c                     t        j                  || j                  d      }| j                  r8t	        t
        | j                        j                  |j                                y t	        t        | j                        j                  |       y )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder	   rE   r   s     rF   
write_textzTextConverter.write_textK  s[    --dDJJI4::&,,T[[];$**40rG   r   c                      dt         dd f fd j                  r j                  d|j                  z          |        j                  d       y )Nrg   r?   c                 Z   t        | t              r| D ]
  } |        n/t        | t              rj                  | j	                                t        | t
              rj                  d       y t        | t              r)j                  j                  j                  |        y y y )N
)	rT   r   r!   r   get_textr"   r   r   export_image)rg   childrenderrE   s     rF   r   z,TextConverter.receive_layout.<locals>.renderS  s    $,! "E5M"D&)0$	*%D'*##/$$11$7 0 +rG   zPage %s
)r   r   r   pageidrE   r   r   s   ` @rF   rW   zTextConverter.receive_layoutR  sG    
	8 
	8D 
	8 ??OOK&--78vrG   rY   re   c                 L    | j                   y t        j                  | ||       y rA   )r   r   rh   )rE   rY   re   s      rF   rh   zTextConverter.render_imageg  s&    #!!$f5rG   ri   rj   rk   rl   rm   c                      y rA   rq   )rE   ri   rj   rk   rl   rm   s         rF   r   zTextConverter.paint_pathm  s     	rG   )r   r   NFN)r   r   r   r*   r.   rR   r   r   r   r   r   rB   r   r   rW   r,   rh   r)   r   r2   r   __classcell__)r   s   @rF   r   r   <  s    
 '+ -1'#' ' 	'
 ' 8$' ' k*' 
'1s 1t 1V  * i D   	
  {# 
rG   r   c                       e Zd ZdddddddZddd	Z	 	 	 	 	 	 	 	 	 	 	 	 d6dedededede	e
   dededededede	e   dede	eeef      de	eeef      dd
fdZdedd
fdZd7dZd7dZdedd
fdZd ed!ed"ed#ed$ed%edd
fd&Zd ed!ed'edd
fd(Zd'ed!ed"ed#ed$ed%edd
fd)Zd eded"ed#ed*edd
fd+Z	 d8d ed!ed"ed#ed$ed%ed,edd
fd-Zd edd
fd.Zded/ed0edd
fd1Zd7d2Zd3edd
fd4Zd7d5Z y
)9HTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rH   blue)r  charNr<   r   r   r=   r>   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr?   c                 F   t         j                  | |||||       | j                  r| j                  st	        d      | j                  s| j                  rt	        d      |ddi}|ddd}|| _        || _        || _        |	| _        |
| _	        || _
        || _        || _        |rJ| j                  j                  | j                         | j                  j                  | j                         | j                  | _        d | _        g | _        | j'                          y )Nr   )Codec is required for a binary I/O outputz1Codec must not be specified for a text I/O outputr  r   r  )r   rH   )r   rB   r   r   r-   r  r	  r
  r   r  r   r  r  updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rE   r<   r   r   r=   r>   r  r	  r
  r   r  r   r   r  r  s                  rF   rB   zHTMLConverter.__init__  s   " 	'5fx 	 	

 TZZ KLL  TZZ STT!7+K$+V<K
"$$$&&&##D$4$45##D$4$45#26
=?rG   r   c                     | j                   rCt        t        | j                        j	                  |j                  | j                                y t        t        | j                        j	                  |       y rA   r   r   r   r   r   r   r	   r   s     rF   r   zHTMLConverter.write  R    ::4::&,,T[[-DE 	 $**40rG   c                     | j                  d       | j                  rd| j                  z  }nd}| j                  |       | j                  d       y )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rE   ss     rF   r  zHTMLConverter.write_header  sO    

#$::!#'::. 
 IA

1

$%rG   c                     t        d| j                        D cg c]  }d| d| d }}ddj                  |      z  }| j                  |       | j                  d       y c c}w )Nr   z
<a href="#z">z</a>z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger=   r   r   )rE   i
page_linksr  s       rF   write_footerzHTMLConverter.write_footer  sk    9>q$++9NOA
1#Rs$/O
OG$))K
 
 	

1

%& Ps   A"c                 8    | j                  t        |             y rA   )r   r6   r   s     rF   r   zHTMLConverter.write_text  s    

3t9rG   colorborderwidthrs   ywrv   c                     | j                   j                  |      }|]d|||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j	                  |       y )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r  getr  r  r   )	rE   r"  r#  rs   r$  r%  rv   color2r  s	            rF   
place_rectzHTMLConverter.place_rect  s     !!%%e,K 

N]]Q&$**4

N

N	  JJqMrG   rg   c                     | j                  |||j                  |j                  |j                  |j                         y rA   )r)  rL   rO   widthheight)rE   r"  r#  rg   s       rF   place_borderzHTMLConverter.place_border  s+    {DGGTWWdjj$++VrG   c                    | j                   | j                   j                  |      }dt        |      ||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j                  |       y )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r6   r  r  r   )	rE   rg   r#  rs   r$  r%  rv   rY   r  s	            rF   place_imagezHTMLConverter.place_image  s     '##006DD I

N]]Q&$**4

N

N	  JJqMrG   sizec                 8   | j                   j                  |      }|}d||| j                  z  | j                  |z
  | j                  z  || j                  z  | j                  z  fz  }| j                  |       | j                  |       | j                  d       y )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r  r'  r  r  r	  r   r   )rE   r"  r   rs   r$  r0  r(  r  s           rF   
place_textzHTMLConverter.place_text  s     !!%%e,. 

N]]Q&$**44::%6	  JJqMOOD!JJ{#rG   writing_modec           	         | j                   j                  | j                         d | _        d||||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j                  |       y )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r  r]   r  r  r  r   )	rE   r"  r#  rs   r$  r%  rv   r4  r  s	            rF   	begin_divzHTMLConverter.begin_div  s     	tzz*
 DJJ"djj0DJJDJJ 	
 	

1rG   c                     | j                   | j                  d       | j                  j                         | _         | j                  d       y )N</span>z</div>)r  r   r  ra   )rE   r"  s     rF   end_divzHTMLConverter.end_div6  s;    ::!JJy!__((*


8rG   fontnamer   c                    ||f}|| j                   k7  rh| j                   | j                  d       |j                  d      d   }| j                  d||| j                  z  | j                  z  fz         || _         | j                  |       y )Nr8  +z.<span style="font-family: %s; font-size:%dpx">)r  r   splitr  r	  r   )rE   r   r:  r   r   fontname_without_subset_tags         rF   put_textzHTMLConverter.put_text=  s    (#4::zz%

9%*2..*=b*A'JJ@.4::0E0VWX DJrG   c                 &    | j                  d       y )Nz<br>r   r   s    rF   put_newlinezHTMLConverter.put_newlineL  s    

6rG   r   c                      dt         t        t        f   dd f fddt        dd f fd |        xj                   j
                  z  c_        y )Nrg   r?   c                 j    t        | t              r"j                  dd|        | D ]
  } |        y )Nr  r   )rT   r$   r-  rg   r   rE   
show_groups     rF   rG  z0HTMLConverter.receive_layout.<locals>.show_groupQ  s9    $,!!+q$7! &Eu%&rG   c           
      X   t        | t              rxj                  | j                  z  c_        j	                  dd|        j
                  rmj                  dj                  | j                  z
  j                  z  z         j                  dj                  | j                  | j                               | D ]
  } |        | j                  | j                  D ]
  } |        y t        | t              rj	                  dd|        y t        | t              r_j                  dd| j                  | j                  | j                  | j                          | D ]
  } |        j#                  d       y t        | t$              r?j'                  | d| j                  | j                  | j                  | j                          y j(                  dk(  r
t        | t*              r#j	                  dd|        | D ]
  } |        y t        | t,              rbj	                  d	d|        j/                  d	t1        | j2                  dz         | j                  | j                  d
       | D ]
  } |        y t        | t4              rTj	                  dd|        j/                  d| j7                         | j                  | j                  | j8                         y t        | t*              r/| D ]
  } |        j(                  dk7  rj;                          y t        | t,              rnj                  d	d| j                  | j                  | j                  | j                   | j=                                | D ]
  } |        j#                  d	       y t        | t4              rAt?        | j@                        }jC                  | j7                         || j8                         y t        | tD              rjG                  | j7                                y )NrH   r   z*<div style="position:absolute; top:%dpx;">z<a name="{}">Page {}</a></div>
r   r  exactr  r     r  loose)$rT   r   r  rO   r-  r   r   r  formatr   groupsr   r   r6  rL   r+  r,  r9  r   r/  r
  r%   r"   r3  rR   indexr   r   r0  rC  get_writing_moder3   r:  r@  r!   r   )rg   r   groupr:  r   rE   rG  s       rF   r   z,HTMLConverter.receive_layout.<locals>.renderX  s   $'(!!&!T2??JJD MMDGG3tzzAC JJ:AA KK
 " "E5M";;*!% *"5)*j g D'*!!'1d3d c D(+xDGGTWWdjj$++V! "E5M"X&Z Y D'*  q$''477DJJTV S ??g-!$
3))*a>%) *E"5M*L I $D)4)))Q=%s4::>':DGGTWWb &* *E"5M*> ; $D&1))&!T:"DMMOTWWdggtyy6 / "$
3%) *E"5M*??g5 ,,.& % $D)4% GG GG JJ KK 113 &* *E"5M*Y/  $D&1#24==#AdmmoxK  $D&18rG   )r   r$   r   r   r  r  rE   r   r   rG  s   ` @@rF   rW   zHTMLConverter.receive_layoutP  sV    	U;0@#@A 	d 	G	 G	D G	R 	v(rG   c                 $    | j                          y rA   r   r   s    rF   closezHTMLConverter.close      rG   )r   r   Nr   g      ?normalT2   Nr   NNr?   N)False)!r   r   r   r  r  r*   r.   rR   r   r   r   r   r   r   r   rB   r   r  r   r   r)  r   r-  r   r/  r3  r6  r9  r@  rC  r   rW   rT  rq   rG   rF   r   r   x  s   K K '+"-10404/#/ / 	/
 / 8$/ / / / / / k*/ / d38n-/ d38n-/  
!/b# $ s t '*/49>CHMR	(# C { t *-27<AFKPU	( #(-27?D	8 $  	
     
:S T S C 5 T SV S SjrG   r   c                       e Zd Z ej                  d      Z	 	 	 	 	 ddededede	de
e   de
e   d	ed
dfdZded
dfdZddZddZded
dfdZded
dfdZddZy)XMLConverterz[ ---]Nr<   r   r   r=   r>   r   stripcontrolr?   c                     t         j                  | |||||       | j                  | j                   k(  rt	        d      || _        || _        | j                          y )Nr   r  )r   rB   r   r   r-   r   r\  r  )rE   r<   r   r   r=   r>   r   r\  s           rF   rB   zXMLConverter.__init__  se     	'5fx 	 	

 TZZ0 KLL&(rG   r   c                     | j                   rCt        t        | j                        j	                  |j                  | j                                y t        t        | j                        j	                  |       y rA   r  r   s     rF   r   zXMLConverter.write  r  rG   c                     | j                   r| j                  d| j                   z         n| j                  d       | j                  d       y )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rF   r  zXMLConverter.write_header  s;    ::JJ?$**LMJJ12

;rG   c                 &    | j                  d       y )Nz	</pages>
rB  r   s    rF   r   zXMLConverter.write_footer  s    

< rG   c                     | j                   r| j                  j                  d|      }| j                  t	        |             y Nro   )r\  CONTROLsubr   r6   r   s     rF   r   zXMLConverter.write_text  s4    <<##B-D

3t9rG   r   c                 X     dt         dd f fddt         dd f fd |       y )Nrg   r?   c                 <   t        | t              r4j                  d| j                  t	        | j
                        fz         y t        | t              rGj                  dt	        | j
                        z         | D ]
  } |        j                  d       y )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rT   r"   r   rN  r5   rZ   r$   rF  s     rF   rG  z/XMLConverter.receive_layout.<locals>.show_group  s    $	*

5zz8DII#678  D+.

4x		7JJK! &Eu%&

+,rG   c                    t        | t              rd| j                  t        | j                        | j
                  fz  }j                  |       | D ]
  } |        | j                  ;j                  d       | j                  D ]
  } |        j                  d       j                  d       y t        | t              r6d| j                  t        | j                        fz  }j                  |       y t        | t              r6d| j                  t        | j                        fz  }j                  |       y t        | t              rEd| j                  t        | j                        | j                         fz  }j                  |       y t        | t              radj                  | j                  t        | j                              }j                  |       | D ]
  } |        j                  d	       y t        | t               rHj                  d
t        | j                        z         | D ]
  } |        j                  d       y t        | t"              rkd}t        | t$              rd}d| j&                  t        | j                        |fz  }j                  |       | D ]
  } |        j                  d       y t        | t(              rdt+        | j,                        t        | j                        | j.                  j                  | j0                  j2                  | j4                  fz  }j                  |       j7                  | j9                                j                  d       y t        | t:              r#j                  d| j9                         z         y t        | t<              rj>                  Pj>                  jA                  |       }j                  dt+        |      | jB                  | jD                  fz         y j                  d| jB                  | jD                  fz         y J tG        d| f             )Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="{}" bbox="{}">
z
</figure>
z<textline bbox="%s">
z</textline>
ro   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
	Unhandled)$rT   r   r   r5   rZ   rotater   rM  r   r   r    r   get_ptsr   rL  rY   r%   r"   r#   rN  r   r6   r:  r   r   r   r0  r   r   r!   r   r   r   r+  r,  rR   )	rg   r  r   rP  wmoderY   r   rE   rG  s	         rF   r   z+XMLConverter.receive_layout.<locals>.render  s   $'<KKTYY'KK@ 
 

1! "E5M";;*JJ|,!% *"5)*JJ}-

;'X W D&)9NNTYY'=  

1L K D&)9NNTYY'=  

1@  D'*BNNTYY'LLNF 
 

1r q D(+4;;IIx		2 

1! "E5M"

=)b a D*-

3htyy6IIJ! "E5M"

?+X W D),d$56/E5JJTYY'9 
 

1! "E5M"

>*> = D&)0 DMM* +))00		  

10

;'   D&)

.@A  D'*##/++88>DJJEt9djj$++>?  JJ<

DKK?XX
  7c;"566urG   r   rQ  s   ` @@rF   rW   zXMLConverter.receive_layout  s9    	V 	 	\	 \	D \	| 	vrG   c                 $    | j                          y rA   rS  r   s    rF   rT  zXMLConverter.closeM  rU  rG   )r   r   NNFrX  )r   r   r   r   compilerd  r*   r.   rR   r   r   r   r   r   rB   r   r  r   r   r   rW   rT  rq   rG   rF   r[  r[    s    bjj89G '+-1"#  	
  8$ k*  
.# $ s t mV m m^rG   r[  c                       e Zd ZdZ ej
                  d      Z	 	 	 	 ddedede	de
dee   d	efd
Zdede	fdZde	ddfdZddZddZde	ddfdZddZdeddfdZddZy)HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]Nr<   r   r   r=   r>   r\  c                 v    t         j                  | |||||       || _        d| _        | j	                          y )Nr   F)r   rB   r\  within_charsr  )rE   r<   r   r   r=   r>   r\  s          rF   rB   zHOCRConverter.__init__e  sB     	'5fx 	 	
 )!rG   rZ   r?   c                     |\  }}}}t        |      }t        | j                  d   |z
        }t        |      }t        | j                  d   |z
        }	d| d| d| d|	 S )N   zbbox  )r   	page_bbox)
rE   rZ   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rF   	bbox_reprzHOCRConverter.bbox_repru  sq    '+$ueUT^^A&./UT^^A&./vhaxq&::rG   r   c                     | j                   rE|j                  | j                         }t        t        | j                        j                  |       y t        t        | j                        j                  |       y rA   )r   r   r   r   r   r   r	   )rE   r   encoded_texts      rF   r   zHOCRConverter.write~  sM    ::;;tzz2L4::&,,\:$**40rG   c                 j   | j                   r| j                  d| j                   z         n| j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d	       y )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
r`  r   s    rF   r  zHOCRConverter.write_header  s    ::JJ:<@JJG
 JJ- 	

:

&'

W	
 	

S	
 	

C	
 	

;

:rG   c                 H    | j                  d       | j                  d       y )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
rB  r   s    rF   r   zHOCRConverter.write_footer  s    

FG

V	
rG   c                 v    | j                   r| j                  j                  d|      }| j                  |       y rc  )r\  rd  re  r   r   s     rF   r   zHOCRConverter.write_text  s-    <<##B-D

4rG   c                 t   t        | j                        dkD  rd}d| j                  v rd}d| j                  v r|dz  }| j                  d| j                  | j                  || j                  | j                        | j                  | j                  | j                  j                         fz         d| _        y )	Nr   ro   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rS   working_textworking_fontr   working_sizer  working_bboxstriprr  )rE   bold_and_italic_styless     rF   
write_wordzHOCRConverter.write_word  s    t  !A%%'"4,,,)?&***&*??&JJ(
 )))).t'8'89))))))//1	  "rG   r   c                 6     dt         dd f fd |       y )Nrg   r?   c                 d   j                   r t        | t              rj                          t        | t              rm| j
                  _        j                  d| j                  dj                  | j
                        d       | D ]
  } |        j                  d       y t        | t              rNj                  dj                  | j
                        z         | D ]
  } |        j                  d       y t        | t              rZj                  d| j                  j                  | j
                        fz         | D ]
  } |        j                  d       y t        | t              rj                   sPd_         | j                         _        | j
                  _        | j"                  _        | j&                  _        y t+        | j                         j-                               d	k(  r0j                          j                  | j                                y j                   d
   | j
                  d
   k7  s2j$                  | j"                  k7  sj(                  | j&                  k7  rCj                          | j
                  _        | j"                  _        | j&                  _        xj                  | j                         z  c_        j                   d	   j                   d
   | j
                  d   j                   d   f_        y y )Nz<div class='ocr_page' id='z	' title='z'>
z</div>
z"<span class='ocr_line' title='%s'>r2  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r   rx   rt  )rr  rT   r   r  r   rZ   rv  r   r   r  r%   r"   rN  r   r   r  r  r:  r  r0  r  rS   r  )rg   r   
child_liner   rE   s      rF   r   z,HOCRConverter.receive_layout.<locals>.render  sy     Zf%=!$'!%

{{DNN499$=? " "E5M"

:&D*-

8DNN499<UV #' 'J:&'

;'D),

Bzz4>>$))#<=> " "E5M"

:&D&)(((,D%(,D%(,		D%(,D%(,		D%4==?0023q8)

4==?3 !--a0DIIaL@#00DMMA#00DII= OO-04		D-04D-04		D-))T]]_<) --a0 --a0 IIaL --a0	-)- *rG   r   r   s   ` @rF   rW   zHOCRConverter.receive_layout  s     6	 6	D 6	p 	vrG   c                 $    | j                          y rA   rS  r   s    rF   rT  zHOCRConverter.close  s    rG   )utf8r   NFrX  )r   r   r   __doc__r   rn  rd  r*   r.   rR   r   r   r   r   rB   r1   r  r   r  r   r   r  r   rW   rT  rq   rG   rF   rp  rp  R  s    U  bjj9:G '+"#  	
  8$  ;d ;s ;1# 1$ 14
s t 
"29V 9 9vrG   rp  )Lr   loggingr   typingr   r   r   r   r   r   r	   r
   r   r   r   pdfminer.pdfcolorr   ro   r   imager   layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   	pdfdevicer&   pdffontr'   r(   	pdfinterpr)   r*   pdfpager+   pdftypesr,   pdfexceptionsr-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   	getLoggerr   r   r9   r   r   r   r   r   r[  rp  rq   rG   rF   <module>r     s+   	  	    ,   ; ;        %      %   $  ) :   ( K K "   g!M M`) & 
68U	3$gfo @9L' 9xoL' od	e<& ePmL' mrG   