ó
5Þ	Pc           @   s  d  Z  d d l Z d d l Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z	 e j d	 ƒ Z
 e j d
 ƒ Z e j d ƒ Z e j d ƒ Z e j d e j ƒ Z e j d	 ƒ Z e j d ƒ Z d e f d „  ƒ  YZ d e j f d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.iÿÿÿÿNs   [&<]s   <(/|\Z)s
   &[a-zA-Z#]s%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s	   <[a-zA-Z]t   >s   --\s*>s   [a-zA-Z][-.a-zA-Z0-9:_]*sJ   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?sê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B   s#   e  Z d  Z d d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s3   | s t  ‚ | |  _ | d |  _ | d |  _ d  S(   Ni    i   (   t   AssertionErrort   msgt   linenot   offset(   t   selfR   t   position(    (    s    /usr/lib/python2.7/HTMLParser.pyt   __init__4   s    	c         C   sW   |  j  } |  j d  k	 r, | d |  j } n  |  j d  k	 rS | d |  j d } n  | S(   Ns   , at line %ds   , column %di   (   R   R   t   NoneR   (   R   t   result(    (    s    /usr/lib/python2.7/HTMLParser.pyt   __str__:   s    	N(   NN(   t   __name__t
   __module__t   __doc__R	   R   R   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyR   1   s   t
   HTMLParserc           B   sø   e  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z
 d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d Z d „  Z RS(   sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  j  ƒ  d S(   s#   Initialize and reset this instance.N(   t   reset(   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR   Z   s    c         C   s8   d |  _  d |  _ t |  _ d |  _ t j j |  ƒ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???N(	   t   rawdatat   lasttagt   interesting_normalt   interestingR	   t	   cdata_tagt
   markupbaset
   ParserBaseR   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR   ^   s
    				c         C   s!   |  j  | |  _  |  j d ƒ d S(   s‘   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        i    N(   R   t   goahead(   R   t   data(    (    s    /usr/lib/python2.7/HTMLParser.pyt   feedf   s    c         C   s   |  j  d ƒ d S(   s   Handle any buffered data.i   N(   R   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   closeo   s    c         C   s   t  | |  j ƒ  ƒ ‚ d  S(   N(   R   t   getpos(   R   t   message(    (    s    /usr/lib/python2.7/HTMLParser.pyt   errors   s    c         C   s   |  j  S(   s)   Return full source of start tag: '<...>'.(   t   _HTMLParser__starttag_text(   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   get_starttag_textx   s    c         C   s   t  |  _ | j ƒ  |  _ d  S(   N(   t   interesting_cdataR   t   lowerR   (   R   t   tag(    (    s    /usr/lib/python2.7/HTMLParser.pyt   set_cdata_mode|   s    	c         C   s   t  |  _ d  |  _ d  S(   N(   R   R   R	   R   (   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   clear_cdata_mode€   s    	c   
      C   sõ  |  j  } d } t | ƒ } x‹| | k  r¨|  j j | | ƒ } | rT | j ƒ  } n | } | | k  r} |  j | | | !ƒ n  |  j | | ƒ } | | k rŸ Pn  | j } | d | ƒ rÅt j	 | | ƒ rÛ |  j
 | ƒ } n¯ | d | ƒ rü |  j | ƒ } nŽ | d | ƒ r|  j | ƒ } nm | d | ƒ r>|  j | ƒ } nL | d | ƒ r_|  j | ƒ } n+ | d | k  r‰|  j d ƒ | d } n P| d k  r°| r¬|  j d ƒ n  Pn  |  j | | ƒ } q | d	 | ƒ rt j	 | | ƒ } | rP| j ƒ  d
 d !}	 |  j |	 ƒ | j ƒ  } | d | d ƒ s8| d } n  |  j | | ƒ } q q¥d | | k r‰|  j | d d
 !ƒ |  j | d
 ƒ } n  Pq | d | ƒ r–t j	 | | ƒ } | r| j d ƒ }	 |  j |	 ƒ | j ƒ  } | d | d ƒ sü| d } n  |  j | | ƒ } q n  t j	 | | ƒ } | r\| rX| j ƒ  | | k rX|  j d ƒ n  Pq¥| d | k  r’|  j d ƒ |  j | | d ƒ } q¥Pq d s t d ‚ q W| rä| | k  rä|  j | | | !ƒ |  j | | ƒ } n  | | |  _  d  S(   Ni    t   <s   </s   <!--s   <?s   <!i   s   EOF in middle of constructs   &#i   iÿÿÿÿt   ;t   &s#   EOF in middle of entity or char refs   interesting.search() lied(   R   t   lenR   t   searcht   startt   handle_datat	   updatepost
   startswitht   starttagopent   matcht   parse_starttagt   parse_endtagt   parse_commentt   parse_pit   parse_declarationR!   t   charreft   groupt   handle_charreft   endt	   entityreft   handle_entityreft
   incompleteR   (
   R   R<   R   t   it   nR3   t   jR1   t   kt   name(    (    s    /usr/lib/python2.7/HTMLParser.pyR   ‡   s    	  	c         C   s}   |  j  } | | | d !d k s) t d ‚ t j | | d ƒ } | sI d S| j ƒ  } |  j | | d | !ƒ | j ƒ  } | S(   Ni   s   <?s   unexpected call to parse_pi()iÿÿÿÿ(   R   R   t   picloseR-   R.   t	   handle_piR<   (   R   R@   R   R3   RB   (    (    s    /usr/lib/python2.7/HTMLParser.pyR7   Ú   s    	 c         C   s€  d  |  _ |  j | ƒ } | d k  r( | S|  j } | | | !|  _ g  } t j | | d ƒ } | sl t d ‚ | j ƒ  } | | d | !j ƒ  |  _	 } xå | | k  r}t
 j | | ƒ } | sÁ Pn  | j d d d ƒ \ }	 }
 } |
 sî d  } ng | d  d k o| d k n s6| d  d k o1| d k n rU| d d !} |  j | ƒ } n  | j |	 j ƒ  | f ƒ | j ƒ  } q™ W| | | !j ƒ  } | d k r+|  j ƒ  \ } } d |  j k rö| |  j j d ƒ } t |  j ƒ |  j j d ƒ } n | t |  j ƒ } |  j d | | | !d  f ƒ n  | j d
 ƒ rM|  j | | ƒ n/ |  j | | ƒ | |  j k r||  j | ƒ n  | S(   Ni    i   s#   unexpected call to parse_starttag()i   i   s   'iÿÿÿÿt   "R    s   />s   
s    junk characters in start tag: %ri   (   R    s   />(   R	   R"   t   check_for_whole_start_tagR   t   tagfindR3   R   R<   R%   R   t   attrfindR:   t   unescapet   appendt   stripR   t   countR,   t   rfindR!   t   endswitht   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR'   (   R   R@   t   endposR   t   attrsR3   RC   R&   t   mt   attrnamet   restt	   attrvalueR<   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR4   æ   sP    			$$	c         C   s  |  j  } t j | | ƒ } | rò | j ƒ  } | | | d !} | d k rR | d S| d k r² | j d | ƒ rx | d S| j d | ƒ rŽ d S|  j | | d ƒ |  j d ƒ n  | d k rÂ d S| d	 k rÒ d S|  j | | ƒ |  j d
 ƒ n  t d ƒ ‚ d  S(   Ni   R    t   /s   />i   iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   malformed start tags   we should not get here!(   R   t   locatestarttagendR3   R<   R1   R0   R!   R   (   R   R@   R   RV   RB   t   next(    (    s    /usr/lib/python2.7/HTMLParser.pyRH     s*    	c         C   sÿ   |  j  } | | | d !d k s) t d ‚ t j | | d ƒ } | sI d S| j ƒ  } t j | | ƒ } | sž |  j d  k	 r€ | S|  j	 d | | | !f ƒ n  | j
 d ƒ j ƒ  } |  j d  k	 rÞ | j ƒ  |  j k rÞ | Sn  |  j | j ƒ  ƒ |  j ƒ  | S(   Ni   s   </s   unexpected call to parse_endtagi   iÿÿÿÿs   bad end tag: %r(   R   R   t	   endendtagR-   R<   t
   endtagfindR3   R   R	   R!   R:   RM   R%   t   handle_endtagR(   (   R   R@   R   R3   RB   R&   (    (    s    /usr/lib/python2.7/HTMLParser.pyR5   7  s$    	   
c         C   s!   |  j  | | ƒ |  j | ƒ d  S(   N(   RR   R_   (   R   R&   RU   (    (    s    /usr/lib/python2.7/HTMLParser.pyRQ   M  s    c         C   s   d  S(   N(    (   R   R&   RU   (    (    s    /usr/lib/python2.7/HTMLParser.pyRR   R  s    c         C   s   d  S(   N(    (   R   R&   (    (    s    /usr/lib/python2.7/HTMLParser.pyR_   V  s    c         C   s   d  S(   N(    (   R   RD   (    (    s    /usr/lib/python2.7/HTMLParser.pyR;   Z  s    c         C   s   d  S(   N(    (   R   RD   (    (    s    /usr/lib/python2.7/HTMLParser.pyR>   ^  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyR/   b  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   handle_commentf  s    c         C   s   d  S(   N(    (   R   t   decl(    (    s    /usr/lib/python2.7/HTMLParser.pyt   handle_declj  s    c         C   s   d  S(   N(    (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyRF   n  s    c         C   s   |  j  d | f ƒ d  S(   Ns   unknown declaration: %r(   R!   (   R   R   (    (    s    /usr/lib/python2.7/HTMLParser.pyt   unknown_declq  s    c            s2   d | k r | S‡  f d †  } t  j d | | ƒ S(   NR+   c            s  |  j  ƒ  d }  yZ |  d d k ri |  d }  |  d d k rS t |  d d ƒ } n t |  ƒ } t | ƒ SWn t k
 r† d |  d SXd	 d  l } t j d  k ré i d
 d 6} t _ x0 | j j	 ƒ  D] \ } } t | ƒ | | <qÆ Wn  y ˆ  j |  SWn t
 k
 rd |  d SXd  S(   Ni    t   #i   t   xt   Xi   s   &#R*   iÿÿÿÿu   't   aposR+   (   Re   Rf   (   t   groupst   intt   unichrt
   ValueErrort   htmlentitydefsR   t
   entitydefsR	   t   name2codepointt	   iteritemst   KeyError(   t   st   cRl   Rm   RC   t   v(   R   (    s    /usr/lib/python2.7/HTMLParser.pyt   replaceEntitiesy  s&    
s#   &(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(   t   ret   sub(   R   Rq   Rt   (    (   R   s    /usr/lib/python2.7/HTMLParser.pyRK   v  s    (   R   R   N(   R   R   R   RS   R   R   R   R   R!   R	   R"   R#   R'   R(   R   R7   R4   RH   R5   RQ   RR   R_   R;   R>   R/   R`   Rb   RF   Rc   Rm   RK   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyR   C   s8   										S		3												(   R   R   Ru   t   compileR   R$   R?   R=   R9   R2   RE   t   commentcloseRI   RJ   t   VERBOSER[   R]   R^   t	   ExceptionR   R   R   (    (    (    s    /usr/lib/python2.7/HTMLParser.pyt   <module>   s&   
	