Dowemo
0 0 0 0

BeautifulSoup

  • Install
    • Open cmd command line
    • Pip3 install beautifulsoup4
  • Case one
import requestsfrom bs4 import BeautifulSoup##defget_html(url): response = requests.get(url)
 response.encoding = response.apparent_encoding
 if response.status_code == 200:
 return response.text
 else:
 print("网络访问出错")##defparse_html(html): soup = BeautifulSoup(html, 'lxml')
 for text in soup.select('#syncad_1 a'):
 print(text)
 print(soup.title)
 print(soup.title.string)
 print(soup.p)
 print(soup.p.name)
 print(soup.a.parent.name)##if __name__ == "__main__":
 url = "http://news.sina.com.cn" html = get_html(url)
 if html isnotNone:
 parse_html(html)
  • Case two
import requestsfrom bs4 import BeautifulSoup##defget_html(url): response = requests.get(url)
 response.encoding = response.apparent_encoding
 if response.status_code == 200:
 return response.text
 else:
 print("网络访问出错")##defparse_html(html): soup = BeautifulSoup(html, 'lxml')
 yield len(soup.div.contents)
 for child in soup.div.children:
 yield child##if __name__ == "__main__":
 url = "http://music.baidu.com" html = get_html(url)
 if html isnotNone:
 for text in parse_html(html):
 print(text)



Copyright © 2011 Dowemo All rights reserved.    Creative Commons   AboutUs