1 Star 0 Fork 0

lleonhardt / Artical_AcFun

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
comment.py 2.53 KB
一键复制 编辑 原始数据 按行查看 历史
lleonhardt 提交于 2016-12-28 12:09 . 更新 comment.py
__author__ ='maesleung'
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import codecs
import urllib.request
import urllib.parse
import re
import http.cookiejar
import os
import json
import codecs
def getHtml(url):
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [('User-Agent',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'),
('Cookie', '2c7505bca2e54d1e85df92d947f2cc5a')]
urllib.request.install_opener(opener)
html_b = urllib.request.urlopen(url).read()
html_s = html_b.decode('utf-8')
return html_s
def anlysis_json(json_data):
s = json.loads(json_data)
if not s['success']: #检查返回状态
return
commentList = s['data']['commentList'] #作出评论的用户id列表
qid = commentList #待被引用的quoteId
for id in commentList: #第一层循环,遍历commentList
count = 0 #楼层
iid = int(id)
q1 = [] #空楼
while True: #第二层循环,‘建楼’,若‘上一层’为0,跳出
q1.append(iid)
iid = int(s['data']['commentContentArr']['c'+str(iid)]['quoteId'])
if int(iid) == 0:
break
#if qid 中有q1,执行在总id重剔除(过滤)
if list(set(qid).intersection(set(q1))):
q1 = q1[::-1]
qid = list(set(qid ).difference(set(q1)))
#输出格式1.userName:content
for r in range(len(q1)):
print(str(count + 1) + '.' +
str(s['data']['commentContentArr']['c'+str(q1[count])]['userName']) + ':' +
str(s['data']['commentContentArr']['c'+str(q1[count])]['content']))
count = count + 1
print('\n')
if __name__ == '__main__':
contentID = 3310482
basic_url = 'http://www.acfun.tv/comment_list_json.aspx?contentId=%d&currentPage=%d'
pagEurl = basic_url % (contentID,50)
json_data = getHtml(pagEurl)
totalPage = int(json.loads(json_data)['data']['totalPage'])
for currentPage in range(totalPage):
print('Pageeeee:%d' % (currentPage))
url = basic_url % (contentID,currentPage)
json_data = getHtml(url)
anlysis_json(json_data)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/lleonhardt/Artical_AcFun.git
git@gitee.com:lleonhardt/Artical_AcFun.git
lleonhardt
Artical_AcFun
Artical_AcFun
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891