1 Star 3 Fork 1

ZhongLeiDev / ImageDownloader-pyqt5

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
CoreUrlAnalyzeMethods.py 5.19 KB
一键复制 编辑 原始数据 按行查看 历史
ZhongLeiDev 提交于 2020-10-14 17:09 . commit base files.
import os
import re
import urllib.request
import random
import json
from DataBean import ImageBean
class UrlAnalyzer():
# bcyurl ="https://bcy.net/item/detail/6862228874235747332?_source_page=charts"
bcy_url_pattern = re.compile(r'https://p\d-bcy.byteimg.com/img/banciyuan/.{32}~noop.image')
bcy_album_pattern = re.compile(r'')
# 使用时随机访问User-Agent列表,也可以是代理列表
ua_list = ["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
def getImgList(self, bcy_url):
print ('开始解析链接',bcy_url,'...')
# 在User-Agent列表中随机选择一个User-Agent
user_agent = random.choice(self.ua_list)
#构造一个请求
request = urllib.request.Request(bcy_url)
# add_header()方法添加/修改一个HTTP报头
request.add_header('User-Agent',user_agent)
#构造访问链接的response对象
urlresponse = urllib.request.urlopen(request)
#读取访问链接的返回字符串,并进行UTF-8编码
html = urlresponse.read().decode("utf-8")
#将Unicode编码字符串“\\u002F”替换为“/”
str = re.sub(r'\\\\u002F', '/', html)
#将\"全部替换为"
str = re.sub(r'\\"', '"', str)
#获取ImgBean的JSON字符串
json_pattern = re.compile(r'\"multi\":\[.*\",\"wid\":')
temp = json_pattern.findall(str)
temp = re.sub(r',\"wid\":','',temp[0])
#构建JSON字符串并转化为字典
json_str = '{' + temp + '}'
imgdata = json.loads(json_str)
#创建ImgBean列表
imglist = []
#获取相册名称
album_name = imgdata['work']
for img in imgdata['multi']:
path = img['original_path']
imgname = path.split('/')[-1].replace('~noop.image', '.jpg')
img_bean = ImageBean()
img_bean.setAlbum(album_name)
img_bean.setImgUrl(path)
img_bean.setImgName(imgname)
img_bean.setImgWidth(img['w'])
img_bean.setImgHeight(img['h'])
imglist.append(img_bean)
#使用正则表达式获取全部的图片链接
return imglist
def downloadImg(self, img_url, img_name, save_path):
self.bcy_mkdir(save_path)
user_agent = random.choice(self.ua_list)
request = urllib.request.Request(img_url)
request.add_header('User-Agent',user_agent)
resp = urllib.request.urlopen(request)
respHtml = resp.read()
picFile = open(save_path + img_name, "wb")
picFile.write(respHtml)
picFile.close()
def bcy_mkdir(self, path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print(path + ' 创建成功')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
# print(path + ' 目录已存在')
return False
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/zhongleidev/image-downloader-pyqt5.git
git@gitee.com:zhongleidev/image-downloader-pyqt5.git
zhongleidev
image-downloader-pyqt5
ImageDownloader-pyqt5
master

搜索帮助