1 Star 0 Fork 5

青林 / Scrpay

forked from 梁新斌 / Scrpay 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
zhihu.py 1.83 KB
一键复制 编辑 原始数据 按行查看 历史
梁新斌 提交于 2019-01-13 22:23 . 修改注释
import requests
from bs4 import BeautifulSoup
import tool
#抓取知乎网站数据
#抓取知乎网站热门话题需要登录,稍后再尝试
def get_url():
url = 'https://www.zhihu.com/hot'
return url
def parse_html(db_conn,db_cur,url):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
'Connection': 'close'
'cookies':'_zap=7c81e1ab-7638-4d89-a8df-7f4f73d5d318; _xsrf=kBJPeG3JoKt8RyDfotQ5tumQD56YX6TX; d_c0="ABDil0kyuA6PTjVQ7IwneotUmWxXWnJH-rU=|1545659382"; capsion_ticket="2|1:0|10:1545659451|14:capsion_ticket|44:ZGZmZTBhNDBmYzJiNDY3ODgxYmUxMjViOWVmOTBiOTM=|08667171cafa1b1e4dc87a108af9cb4fafcdb1a5142070981ee56def83292c45"; z_c0="2|1:0|10:1545659477|4:z_c0|92:Mi4xZW1vbkRRQUFBQUFBSUNLQ1NUSzREaVlBQUFCZ0FsVk5WVElPWFFBSm1EN1JRUHZlUzFiYTdwcWJIY3haOXZxQjFR|ec049be6c582d5a0d25bf8c6df84b9182a25f15df13b47bec1bf2b0850e9fca1"; q_c1=d9dbf3826c314a8cb47c0ea608032745|1545659479000|1545659479000; __utmv=51854390.100--|2=registration_date=20181113=1^3=entry_date=20181113=1; tst=h; __utma=51854390.459793879.1545659575.1545659575.1547120931.2; __utmb=51854390.0.10.1547120931; __utmc=51854390; __utmz=51854390.1547120931.2.2.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/hot; tgw_l7_route=060f637cd101836814f6c53316f73463'
}
# 获取知乎网站热榜数据
response = requests.get(url=url, headers=headers).text
# if response.status_code == 200:
# soup = BeautifulSoup(response.text,'lxml')
# else:
# print('数据获取失败')
#
# hots = soup.find_all(attrs={'class','HotItem-content'})
print(response)
if __name__ == '__main__':
db_conn = tool.get_connect()
db_cur = tool.get_cursor(db_conn)
url = get_url()
parse_html(db_conn, db_cur, url)
Python
1
https://gitee.com/huapenghui/Scrpay.git
git@gitee.com:huapenghui/Scrpay.git
huapenghui
Scrpay
Scrpay
master

搜索帮助