代码拉取完成,页面将自动刷新
import re
import requests
# session = None
# token = None
media_url = []
fail_media_url_list = []
def login():
url = "http://staging.englishtown.com/services/api/mobile/service/login"
data = {
"serviceRequest": {
"appVersion": "2.0.2",
"password": "1",
"platform": "Android",
"productId": 4,
"unifiedLogin": True,
"userName": "stest10783"
}
}
result = requests.post(url=url, json=data)
session = result.json()["serviceResponse"]["sessionId"]
token = result.json()["serviceResponse"]["token"]
return session, token
def get_course(session, token, id):
url = "http://staging.englishtown.com/services/api/mobile/service/activitycontent"
data = {
"serviceRequest": {
"activities": [id],
"countrycode": "cn",
"partnercode": "Cool",
"siteVersion": "development",
"appVersion": "2.0.2",
"culturecode": "zh-CN",
"platform": "Android",
"productId": 4,
"sessionId": session,
"token": token,
"unifiedLogin": True
}
}
result = requests.post(url=url, json=data)
if result.status_code == 200:
match_path = re.compile("('|\")\w+Path('|\"):\s('|\")((http|https):\/\/.*?)('|\")", re.IGNORECASE)
url_list = match_path.findall(str(result.json()))
# print(str(result.json()))
urls = [y for x in url_list for y in x if y.endswith((".mp3", ".mp4", ".jpg"))]
media_url.append(urls)
def check_resource(url):
pattern = re.compile(r'http:\/\/+(.*).[(mp3)|(mp4)|(jpg)]$', re.IGNORECASE)
url_status = 0
# print(url)
if re.search(pattern, url):
try:
status = requests.head(url, allow_redirects=False).status_code
if status != 200:
fail_media_url_list.append(url)
url_status += 1
except:
fail_media_url_list.append(url)
url_status += 1
else:
fail_media_url_list.append(url)
url_status += 1
return url_status
activity_ids = []
def get_activity_id():
with open("/Users/anderson/Downloads/lzero1.json", 'r') as f:
lines = f.readlines()
for line in lines:
searchObj = re.search("\"activityId\": (\d+)", line, re.M | re.I)
if searchObj:
activity_ids.append(searchObj.group(1))
if __name__ == "__main__":
get_activity_id()
if activity_ids != []:
session, token = login()
for id in activity_ids:
get_course(session, token, int(id))
# print(media_url)
urls = [y for x in media_url for y in x]
# print(urls)
print(len(urls))
if urls != []:
for url in urls:
check_resource(url)
else:
print("please check your activity")
from collections import Counter
print(Counter(urls).most_common(20))
# if len(fail_media_url_list)>0:
# print(len(fail_media_url_list))
# print(fail_media_url_list)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。