1from html.parser import HTMLParser
2
3class MyHTMLParser(HTMLParser):
4 def handle_starttag(self, tag, attrs):
5 print("Encountered a start tag:", tag)
6
7 def handle_endtag(self, tag):
8 print("Encountered an end tag :", tag)
9
10 def handle_data(self, data):
11 print("Encountered some data :", data)
12
13parser = MyHTMLParser()
14parser.feed('<html><head><title>Test</title></head>'
15 '<body><h1>Parse me!</h1></body></html>')
16
1from html.parser import HTMLParser
2from html.entities import name2codepoint
3
4class MyHTMLParser(HTMLParser):
5 def handle_starttag(self, tag, attrs):
6 print("Start tag:", tag)
7 for attr in attrs:
8 print(" attr:", attr)
9
10 def handle_endtag(self, tag):
11 print("End tag :", tag)
12
13 def handle_data(self, data):
14 print("Data :", data)
15
16 def handle_comment(self, data):
17 print("Comment :", data)
18
19 def handle_entityref(self, name):
20 c = chr(name2codepoint[name])
21 print("Named ent:", c)
22
23 def handle_charref(self, name):
24 if name.startswith('x'):
25 c = chr(int(name[1:], 16))
26 else:
27 c = chr(int(name))
28 print("Num ent :", c)
29
30 def handle_decl(self, data):
31 print("Decl :", data)
32
33parser = MyHTMLParser()
34