1from bs4 import BeautifulSoup
2# works very well with Requests or urllib module
3
4# Quick Start
5from bs4 import BeautifulSoup
6soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
7print(soup.prettify())
8#OUTPUT:
9<html>
10 <body>
11 <p>
12 Some
13 <b>
14 bad
15 <i>
16 HTML
17 </i>
18 </b>
19 </p>
20 </body>
21</html>
22
23>>> soup.find(text="bad")
24'bad'
25>>> soup.i
26<i>HTML</i>
27
28# Another Example
29>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
30>>> print(soup.prettify())
31
32<?xml version="1.0" encoding="utf-8"?>
33<tag1>
34 Some
35 <tag2/>
36 bad
37 <tag3>
38 XML
39 </tag3>
40</tag1>
1from bs4 import BeautifulSoup
2
3with open("index.html") as fp:
4 soup = BeautifulSoup(fp)
5
6soup = BeautifulSoup("<html>a web page</html>")
7