htmllib.HTMLParser¶
-
class
htmllib.HTMLParser(formatter, verbose=0)[source]¶ This is the basic HTML parser class.
It supports all entity names required by the XHTML 1.0 Recommendation. It also defines handlers for all HTML 2.0 and many HTML 3.0 and 3.2 elements.
Methods¶
__init__(formatter[, verbose]) |
Creates an instance of the HTMLParser class. |
_convert_ref(match) |
|
_parse_doctype_attlist(i, declstartpos) |
|
_parse_doctype_element(i, declstartpos) |
|
_parse_doctype_entity(i, declstartpos) |
|
_parse_doctype_notation(i, declstartpos) |
|
_parse_doctype_subset(i, declstartpos) |
|
_scan_name(i, declstartpos) |
|
anchor_bgn(href, name, type) |
This method is called at the start of an anchor region. |
anchor_end() |
This method is called at the end of an anchor region. |
close() |
Handle the remaining data. |
convert_charref(name) |
Convert character reference, may be overridden. |
convert_codepoint(codepoint) |
|
convert_entityref(name) |
Convert entity references. |
ddpop([bl]) |
|
do_base(attrs) |
|
do_br(attrs) |
|
do_dd(attrs) |
|
do_dt(attrs) |
|
do_hr(attrs) |
|
do_img(attrs) |
|
do_isindex(attrs) |
|
do_li(attrs) |
|
do_link(attrs) |
|
do_meta(attrs) |
|
do_nextid(attrs) |
|
do_p(attrs) |
|
do_plaintext(attrs) |
|
end_a() |
|
end_address() |
|
end_b() |
|
end_blockquote() |
|
end_body() |
|
end_cite() |
|
end_code() |
|
end_dir() |
|
end_dl() |
|
end_em() |
|
end_h1() |
|
end_h2() |
|
end_h3() |
|
end_h4() |
|
end_h5() |
|
end_h6() |
|
end_head() |
|
end_html() |
|
end_i() |
|
end_kbd() |
|
end_listing() |
|
end_menu() |
|
end_ol() |
|
end_pre() |
|
end_samp() |
|
end_strong() |
|
end_title() |
|
end_tt() |
|
end_ul() |
|
end_var() |
|
end_xmp() |
|
error(message) |
|
feed(data) |
Feed some data to the parser. |
finish_endtag(tag) |
|
finish_shorttag(tag, data) |
|
finish_starttag(tag, attrs) |
|
get_starttag_text() |
|
getpos() |
Return current line number and offset. |
goahead(end) |
|
handle_charref(name) |
Handle character reference, no need to override. |
handle_comment(data) |
|
handle_data(data) |
|
handle_decl(decl) |
|
handle_endtag(tag, method) |
|
handle_entityref(name) |
Handle entity references, no need to override. |
handle_image(src, alt, *args) |
This method is called to handle images. |
handle_pi(data) |
|
handle_starttag(tag, method, attrs) |
|
parse_comment(i[, report]) |
|
parse_declaration(i) |
|
parse_endtag(i) |
|
parse_marked_section(i[, report]) |
|
parse_pi(i) |
|
parse_starttag(i) |
|
report_unbalanced(tag) |
|
reset() |
|
save_bgn() |
Begins saving character data in a buffer instead of sending it to the formatter object. |
save_end() |
Ends buffering character data and returns all data saved since the preceding call to the save_bgn() method. |
setliteral(*args) |
Enter literal mode (CDATA). |
setnomoretags() |
Enter literal mode (CDATA) till EOF. |
start_a(attrs) |
|
start_address(attrs) |
|
start_b(attrs) |
|
start_blockquote(attrs) |
|
start_body(attrs) |
|
start_cite(attrs) |
|
start_code(attrs) |
|
start_dir(attrs) |
|
start_dl(attrs) |
|
start_em(attrs) |
|
start_h1(attrs) |
|
start_h2(attrs) |
|
start_h3(attrs) |
|
start_h4(attrs) |
|
start_h5(attrs) |
|
start_h6(attrs) |
|
start_head(attrs) |
|
start_html(attrs) |
|
start_i(attrs) |
|
start_kbd(attrs) |
|
start_listing(attrs) |
|
start_menu(attrs) |
|
start_ol(attrs) |
|
start_pre(attrs) |
|
start_samp(attrs) |
|
start_strong(attrs) |
|
start_title(attrs) |
|
start_tt(attrs) |
|
start_ul(attrs) |
|
start_var(attrs) |
|
start_xmp(attrs) |
|
unknown_charref(ref) |
|
unknown_decl(data) |
|
unknown_endtag(tag) |
|
unknown_entityref(ref) |
|
unknown_starttag(tag, attrs) |
|
updatepos(i, j) |
Attributes¶
entity_or_charref |
|
entitydefs |