Python爬取豆瓣电影Top250

这是我参考改写的代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import requests
from bs4 import BeautifulSoup

def get_movies():
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0','Host':'movie.douban.com'}
movie_list1=[]
movie_list2=[]
movie_list3=[]
movie_list4=[]
for i in range(0,10):
link='https://movie.douban.com/top250?start='+str(i*25)
r=requests.get(link,headers=headers,timeout=10)
print(str(i+1),'页响应状态码',r.status_code)

soup=BeautifulSoup(r.text,'lxml')
for t in soup.find_all('div', 'hd'):
name = t.find('span', 'title').get_text()
movie_list1.append(name)
for t in soup.find_all('div', 'hd'):
name = t.find('span', 'other').get_text()
movie_list2.append(name)
for t in soup.find_all('div', 'info'):
info = t.find('p').get_text().replace(' ','')
movie_list3.append(info)
for t in soup.find_all('div', 'star'):
fen = t.find('span', 'rating_num').get_text()
movie_list4.append(fen)
return movie_list1,movie_list2,movie_list3,movie_list4
a,b,c,d=get_movies()
def main():
import codecs
with codecs.open('c:\\Users\\lin paddy\\Desktop\\python\\Python爬虫\\豆瓣电影.txt', 'wb',encoding='utf-8') as f:
f.write('豆瓣电影 Top 250\n')
for n in range(0,250):

f.write('Top'+str(n+1)+'\n')
f.write('电影名:'+a[n]+b[n]+c[n]+'豆瓣评分'+d[n])
f.write('\n\n')

if __name__ == '__main__':
main()

运行后可以得到下面所示的图片结果: