
    $h7                       d Z ddlmZ ddlZddlZddlZddlmZm	Z	 erddl
mZ ej                  j                  d      Zej                  j                  e      Zej"                  j%                  e       eej&                  d<    ej(                  d      e_         ej(                  d	      e_        ej,                  e_         ej(                  d
ej0                        e_         ej(                  d      Z G d dej6                        Zy)a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
z^([ ]*\n){2}c                       e Zd ZU dZd fdZ fdZ fdZedd       ZddZ	ddZ
ddZd d	Zd!d
Zd"dZd dZd#dZd#dZd!dZd!dZd!dZd!dZd$ fdZd$ fdZd%d& fdZdZded<   d'dZd$dZ xZS )(HTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    c                v    d|vrd|d<   t        dg      | _        dg| _        t        |   |i | || _        y )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cachesuper__init__md)selfr   argskwargs	__class__s       P/var/www/html/sandstorm/venv/lib/python3.12/site-packages/markdown/htmlparser.pyr   zHTMLExtractor.__init__S   sJ    V+).F%& tf+#$# 	$)&)    c                x    d| _         d| _        g | _        g | _        g | _        dg| _        t        |           y)z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   s    r   r   zHTMLExtractor.reset`   s9    
 "
!##%#$#r   c                   t         |           t        | j                        rb| j                  r;| j
                  s/| j                  t        j                  | j                               n| j                  | j                         t        | j                        r_| j                  j                  | j                  j                  j                  dj                  | j                                     g | _	        yy)zHandle any buffered data. N)r   closelenrawdatar   
cdata_elemhandle_datar   unescaper   r   appendr   	htmlStashstorejoinr    s    r   r#   zHTMLExtractor.closek   s    t|| $$T__  !4!4T\\!BC  .t{{MM  !2!2!8!89M!NODK r   c                h   t        t        | j                        dz
  | j                  dz
        D ]e  }| j                  |   }| j                  j                  d|      }|dk(  rt        | j                        }| j                  j                  |dz          g | j                  | j                  dz
     S )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger$   r   linenor%   findr)   )r   iilast_line_start_poslf_poss       r   line_offsetzHTMLExtractor.line_offsetz   s     D334Q6AF 	5B"&"9"9""=\\&&t-@AF|T\\*##**6!84	5 &&t{{1}55r   c                    | j                   dk(  ry| j                   dkD  ry| j                  | j                  | j                  | j                   z    j                         dk(  S )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   T   Fr"   )offsetr%   r7   stripr   s    r   at_line_startzHTMLExtractor.at_line_start   sV     ;;!;;?||D,,T-=-=-KLRRTXZZZr   c                    | j                   | j                  z   }t        j                  j	                  | j
                  |      }|r| j
                  ||j                          S dj                  |      S )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)r7   r:   r   	endendtagsearchr%   endformat)r   tagstartms       r   get_endtag_textzHTMLExtractor.get_endtag_text   s_       4;;.  ''e<<<aeeg.. >>#&&r   c                *   || j                   v r| j                  ||       y | j                  j                  |      rJ| j                  s| j                         r.| j                  s"d| _        | j                  j                  d       | j                         }| j                  r7| j                  j                  |       | j                  j                  |       y | j                  j                  |       || j                  v r| j                          y y )NTr/   )r   handle_startendtagr   is_block_levelr   r=   r   r   r)   get_starttag_textr   r   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rC   attrstexts       r   handle_starttagzHTMLExtractor.handle_starttag   s    $//!##C/77!!#&DKKD<N<N<PY]YcYcDJMM  &%%'::JJc"KKt$MM  &d111%%' 2r   c                :   | j                  |      }| j                  rb| j                  j                  |       || j                  v r7| j                  r+| j                  j                         |k(  rn| j                  r+t        | j                        dk(  rt        j                  | j                  | j                  | j                  z   t        |      z   d        r| j                  j                  d       nd| _        d| _        | j                  j                  | j                  j                  j!                  dj#                  | j                                     | j                  j                  d       g | _        y y | j                  j                  |       y )Nr   r/   TFr"   

)rF   r   r   r)   r   popr$   blank_line_rematchr%   r7   r:   r   r   r   r*   r+   r,   )r   rC   rN   s      r   handle_endtagzHTMLExtractor.handle_endtag   s,   ##C(::KKt$djj jjzz~~'3. jj 4::!# &&t||D4D4Dt{{4RUXY]U^4^4_'`aKK&&t, #'DK"
$$TWW%6%6%<%<RWWT[[=Q%RS$$V,  $ MM  &r   c                    | j                   rd|v rd| _         | j                  r| j                  j                  |       y | j                  j                  |       y )Nr/   F)r   r   r   r)   r   r   datas     r   r'   zHTMLExtractor.handle_data   sA    ;;44<DK::KKt$MM  &r   c                   | j                   s| j                  r| j                  j                  |       y| j	                         r	|rt
        j                  | j                  | j                  | j                  z   t        |      z   d       r|dz  }nd| _        | j                  r| j                  d   nd}|j                  d      s,|j                  d      r| j                  j                  d       | j                  j                  | j                  j                  j                  |             | j                  j                  d       y| j                  j                  |       y)z Handle empty tags (`<data>`). Nr/   Tr0   r"   rQ   )r   r   r   r)   r=   rS   rT   r%   r7   r:   r$   r   endswithr   r*   r+   )r   rX   is_blockitems       r   handle_empty_tagzHTMLExtractor.handle_empty_tag   s    ::KKt$!h""4<<0@0@4;;0NQTUYQZ0Z0[#\] #(,4==$2D==(T]]4-@$$T*MM  !2!2!8!8!>?MM  (MM  &r   c                x    | j                  | j                         | j                  j                  |             y )Nr[   )r]   rJ   r   rI   )r   rC   rM   s      r   rH   z HTMLExtractor.handle_startendtag   s.    d446AWAWX[A\]r   c                H    | j                  dj                  |      d       y )Nz&#{};Fr_   r]   rB   r   names     r   handle_charrefzHTMLExtractor.handle_charref   s    gnnT2UCr   c                H    | j                  dj                  |      d       y )Nz&{};Fr_   ra   rb   s     r   handle_entityrefzHTMLExtractor.handle_entityref   s    fmmD1EBr   c                H    | j                  dj                  |      d       y )Nz	<!--{}-->Tr_   ra   rW   s     r   handle_commentzHTMLExtractor.handle_comment   s     k006Fr   c                H    | j                  dj                  |      d       y )Nz<!{}>Tr_   ra   rW   s     r   handle_declzHTMLExtractor.handle_decl   s    gnnT2TBr   c                H    | j                  dj                  |      d       y )Nz<?{}?>Tr_   ra   rW   s     r   	handle_pizHTMLExtractor.handle_pi  s    hood3dCr   c                t    |j                  d      rdnd}| j                  dj                  ||      d       y )NzCDATA[z]]>z]>z<![{}{}Tr_   )
startswithr]   rB   )r   rX   rA   s      r   unknown_declzHTMLExtractor.unknown_decl  s4    x0edi..tS9DIr   c                    | j                         s| j                  rt        |   |      S | j	                  d       |dz   S )Nz<?   )r=   r   r   parse_pir'   r   ir   s     r   rr   zHTMLExtractor.parse_pi  s>    4;;7#A&& 	1ur   c                    | j                         s| j                  rt        |   |      S | j	                  d       |dz   S )Nz<!rq   )r=   r   r   parse_html_declarationr'   rs   s     r   rv   z$HTMLExtractor.parse_html_declaration  s>    4;;71!44 	1ur   c                t    t         |   ||      }|dk(  ry| j                  | j                  || d       |S )Nr0   Fr_   )r   parse_bogus_commentr]   r%   )r   rt   reportposr   s       r   rx   z!HTMLExtractor.parse_bogus_comment  sC     g)!V4"9dll1S1EB
r   Nz
str | None_HTMLExtractor__starttag_textc                    | j                   S )z)Return full source of start tag: `<...>`.)r{   r<   s    r   rJ   zHTMLExtractor.get_starttag_text'  s    ###r   c                   d | _         | j                  |      }|dk  r|S | j                  }||| | _         g }t        j                  j                  ||dz         }|sJ d       |j                         }|j                  d      j                         x| _	        }||k  rt        j                  j                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d dcxk(  r|dd  k(  rn n|dd }|rt        j                  |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d	vr| j                         \  }}d
| j                   v rP|| j                   j                  d
      z   }t!        | j                         | j                   j#                  d
      z
  }n|t!        | j                         z   }| j%                  |||        |S |j'                  d      r| j)                  ||       |S || j*                  v r| j-                  |       | j/                  ||       |S )Nr   r.   z#unexpected call to parse_starttag()rq   r9   'r0   ")>/>r/   r   )r{   check_for_whole_start_tagr%   r   tagfind_tolerantrT   rA   grouplowerlasttagattrfind_tolerantr(   r)   r;   getposcountr$   rfindr'   rZ   rH   rK   set_cdata_moderO   )r   rt   endposr%   rM   rT   krC   rE   attrnamerest	attrvaluerA   r2   r:   s                  r   parse_starttagzHTMLExtractor.parse_starttag+  s`   #//2A:M,,&q0 ++11'1Q3?;;;uIIK"[[^1133s&j,,227A>A()1a(8%HdI 	2A$8)BC.82A#7237%aO	&//	:	LL(..*I67A &j a%%'k!![[]NFFt+++$"6"6"<"<T"BBT112//55d;<  #d&:&:";;WQv./M<<##C/  d111##C(  e,r   )r   r   )returnint)r   bool)rC   strr   r   )rC   r   rM   zSequence[tuple[str, str]])rC   r   )rX   r   )rX   r   r[   r   )rc   r   )rt   r   r   r   )r   )rt   r   ry   r   r   r   )r   r   )__name__
__module____qualname____doc__r   r   r#   propertyr7   r=   rF   rO   rU   r'   r]   rH   rd   rf   rh   rj   rl   ro   rr   rv   rx   r{   __annotations__rJ   r   __classcell__)r   s   @r   r	   r	   J   s    	 
6 
6['(*'6''.^DCGCDJ #'OZ&$0r   r	   )r   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilepiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantrS   
HTMLParserr	    r   r   <module>r      s   ( # 	  
 *!
 ~~.^^,,T2
   
 #&L   RZZ'
 !rzz"?@
  #,,
 (2

 4 ZZ)
 %$ 

?+QJ)) Qr   