1 Star 0 Fork 0

Sync.Nep / djc_helper

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
download.py 14.70 KB
一键复制 编辑 原始数据 按行查看 历史
风之凌殇 提交于 2024-01-01 17:51 . fix: lint
from __future__ import annotations
import os
import random
from typing import Callable
import requests
from urllib3 import disable_warnings
from urllib3.exceptions import InsecureRequestWarning
from const import downloads_dir
from log import color, logger
from util import get_now, human_readable_size, make_sure_dir_exists, show_progress
user_agent_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36",
}
# 全局禁用 SSL 警告
disable_warnings(InsecureRequestWarning)
progress_callback_func_type = Callable[[str, int, int, float], None]
# 默认连接超时
DOWNLOAD_CONNECT_TIMEOUT = 8
# 测速模式开关,开启后将对比各个不同镜像的下载速度
TEST_SPEED_MODE = False
MAX_TEST_SECONDS = 10
def download_file(
url: str,
download_dir=downloads_dir,
filename="",
connect_timeout=DOWNLOAD_CONNECT_TIMEOUT,
extra_progress_callback: progress_callback_func_type | None = None,
extra_info="",
) -> str:
"""
下载指定url的文件到指定目录
:param url: 要下载的文件的url
:param download_dir: 保存的目录
:param filename: 保存的文件名,如果为空,则使用url的文件名
:param connect_timeout: 连接超时时间
:param extra_progress_callback: 每次更新进度时的额外回调,比如可在特定条件下通过抛异常来中断下载
:return: 下载后的文件绝对路径
"""
download_dir = os.path.realpath(download_dir)
filename = filename or os.path.basename(url)
start_time = get_now()
target_file_path = os.path.join(download_dir, filename)
logger.info(f"开始下载 {url}{target_file_path}(连接超时为 {connect_timeout} 秒)")
if extra_info != "":
logger.info(extra_info)
response = requests.get(url, stream=True, timeout=connect_timeout, headers=user_agent_headers, verify=False)
if response.status_code != 200:
raise Exception(f"下载失败,状态码 {response.status_code}")
make_sure_dir_exists(download_dir)
check_cloudflare_100mb_test(response)
with open(target_file_path, "wb") as f:
total_length_optional = response.headers.get("content-length")
if total_length_optional is None: # no content length header
f.write(response.content)
else:
dl = 0
total_length = int(total_length_optional)
for data in response.iter_content(chunk_size=4096):
# filter out keep-alive new lines
if not data:
continue
f.write(data)
dl += len(data)
used_seconds = (get_now() - start_time).total_seconds()
show_progress(filename, total_length, dl, used_seconds)
if extra_progress_callback is not None:
extra_progress_callback(filename, total_length, dl, used_seconds)
if TEST_SPEED_MODE and used_seconds >= MAX_TEST_SECONDS:
logger.warning(f"当前为测试模式,仅最多尝试 {MAX_TEST_SECONDS} 秒,避免测试时间过久")
return ""
if dl > total_length:
# 如果实际写入文件大小比headers中写的要大,一般是因为启用了gzip,传输的内容是压缩后的,但是requests会自动解压缩,所以实际大小会更大
# 这种情况会导致上面的进度条没有换行,这里主动换行一下
print("")
end_time = get_now()
used_time = end_time - start_time
actual_size = os.stat(target_file_path).st_size
speed = actual_size / used_time.total_seconds()
human_readable_speed = human_readable_size(speed)
logger.info(color("bold_yellow") + f"下载完成,耗时 {used_time}({human_readable_speed}/s)")
return target_file_path
def check_cloudflare_100mb_test(response: requests.Response):
content_type = response.headers.get("content-type")
content_length = response.headers.get("content-length")
server = response.headers.get("server")
if content_type == "application/x-tar" and content_length == "104857600" and server == "cloudflare":
raise Exception("镜像返回了cf的拦截文件100mb.test,跳过当前镜像")
def extend_urls(current_urls: list[str], new_urls: list[str]):
urls_to_add = [*new_urls]
if not TEST_SPEED_MODE:
random.shuffle(urls_to_add)
current_urls.extend(urls_to_add)
def download_latest_github_release(
download_dir=downloads_dir,
asset_name="djc_helper.7z",
owner="fzls",
repo_name="djc_helper",
connect_timeout=DOWNLOAD_CONNECT_TIMEOUT,
extra_progress_callback: progress_callback_func_type | None = None,
version="",
) -> str:
"""
从github及其镜像下载指定仓库最新的release中指定资源
:param download_dir: 下载目录
:param asset_name: release的资源名称
:param owner: 仓库拥有者名称
:param repo_name: 仓库名称
:param connect_timeout: 连接超时时间
:param extra_progress_callback: 每次更新进度时的额外回调,比如可在特定条件下通过抛异常来中断下载
:param version: 指定的版本号,形如 20.1.2,若未指定则下载最新版本
:return: 最终下载的本地文件绝对路径
"""
if TEST_SPEED_MODE:
logger.warning("当前为测速模式,将禁用洗牌流程,并依次尝试各个镜像,从而进行对比")
release_file_path = f"{owner}/{repo_name}/releases/latest/download/{asset_name}"
if version != "":
release_file_path = f"{owner}/{repo_name}/releases/download/v{version}/{asset_name}"
# note: 手动测试下载速度时,使用 IDM / 迅雷 等测试,不要直接用chrome测试,速度差很多
urls: list[str] = []
# 先加入比较快的几个镜像
extend_urls(
urls,
[
# 8.4MiB/s
f"https://gh.con.sh/https://github.com/{release_file_path}",
# 8.2MiB/s
f"https://dl.ghpig.top/https://github.com/{release_file_path}",
# 3.4MiB/s
f"https://download.yzuu.cf/{release_file_path}",
# 2.5MiB/s
f"https://download.fastgit.org/{release_file_path}",
# 2.4MiB/s
f"https://gh.ddlc.top/https://github.com/{release_file_path}",
# 2.2MiB/s
f"https://cors.isteed.cc/github.com/{release_file_path}",
# 2.1MiB/s
f"https://download.njuu.cf/{release_file_path}",
# 2.1MiB/s
f"https://download.nuaa.cf/{release_file_path}",
# 926.3KiB/s
f"https://ghps.cc/https://github.com/{release_file_path}",
],
)
# 最后加入几个慢的镜像和源站
extend_urls(
urls,
[
# 375.0KiB/s
f"https://hub.gitmirror.com/https://github.com/{release_file_path}",
# 87.6KiB/s
f"https://download.fgit.cf/{release_file_path}",
# 27.0KiB/s
f"https://ghproxy.net/https://github.com/{release_file_path}",
# 有时会 100mb.test
f"https://dl-slb.ghpig.top/https://github.com/{release_file_path}",
# 有时会 100mb.test
f"https://gh-proxy.com/https://github.com/{release_file_path}",
],
)
# 再保底放入一些可能失效的镜像
extend_urls(
urls,
[
# 5.5KiB/s
f"https://gh.h233.eu.org/https://github.com/{release_file_path}",
# ConnectionResetError
f"https://ghproxy.com/https://github.com/{release_file_path}",
# timeout
f"https://mirror.ghproxy.com/https://github.com/{release_file_path}",
# timeout
f"https://proxy.zyun.vip/https://github.com/{release_file_path}",
# 181.5B/s
f"https://github.com/{release_file_path}",
# timeout
f"https://kgithub.com/{release_file_path}",
# 429
f"https://gh.api.99988866.xyz/https://github.com/{release_file_path}",
# 404
f"https://js.xxooo.ml/https://github.com/{release_file_path}",
# 404
f"https://ghdl.feizhuqwq.cf/https://github.com/{release_file_path}",
# 1.3KiB/s
f"https://gh.gh2233.ml/https://github.com/{release_file_path}",
# 404
f"https://download.fastgit.ixmu.net/{release_file_path}",
],
)
if TEST_SPEED_MODE:
logger.info(color("bold_cyan") + "当前全部镜像如下:\n" + "\n".join(urls) + "\n")
# 开始依次下载,直到成功下载
for idx, url in enumerate(urls):
try:
mirror = extract_mirror_site(
url,
release_file_path,
"https://github.com/",
)
log_mirror_status(idx, len(urls), mirror)
file_path = download_file(
url, download_dir, connect_timeout=connect_timeout, extra_progress_callback=extra_progress_callback
)
if not TEST_SPEED_MODE:
return file_path
else:
logger.warning("\n")
except BaseException as e:
logger.error(f"{idx + 1}/{len(urls)}: 下载失败,异常内容: {e},将继续尝试下一个github镜像")
logger.debug("详细异常信息", exc_info=e)
continue
raise Exception("所有镜像都下载失败")
def download_github_raw_content(
filepath_in_repo: str,
download_dir=downloads_dir,
owner="fzls",
repo_name="djc_helper",
branch_name="master",
connect_timeout=DOWNLOAD_CONNECT_TIMEOUT,
) -> str:
"""
从github及其镜像下载指定仓库的指定分支的指定文件到本地指定目录
:param filepath_in_repo: 要下载的文件在仓库中的路径,如 docs/README.md
:param download_dir: 本地保存的目录
:param owner: 仓库拥有者名称
:param repo_name: 仓库名称
:param branch_name: 分支名称
:param connect_timeout: 连接超时
:return: 最终下载的本地文件绝对路径
"""
if TEST_SPEED_MODE:
logger.warning("当前为测速模式,将禁用洗牌流程,并依次尝试各个镜像,从而进行对比")
urls: list[str] = []
# 先加入比较快的几个镜像
extend_urls(
urls,
[
# 521.4KiB/s
f"https://github.moeyy.xyz/https://raw.githubusercontent.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 556.3KiB/s
f"https://raw.gitmirror.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 233.7KiB/s
f"https://raw.githubusercontents.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 203.6KiB/s
f"https://mirror.ghproxy.com/https://raw.githubusercontent.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
],
)
# 然后加入几个慢的镜像和源站
extend_urls(
urls,
[
# 194.3KiB/s
f"https://raw.fastgit.org/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 163.3KiB/s
f"https://cdn.jsdelivr.net/gh/{owner}/{repo_name}@{branch_name}/{filepath_in_repo}",
# 96.1KiB/s
f"https://jsdelivr.b-cdn.net/gh/{owner}/{repo_name}@{branch_name}/{filepath_in_repo}",
# 71.1KiB/s
f"https://raw.fgit.cf/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 16.5KiB/s
f"https://fastly.jsdelivr.net/gh/{owner}/{repo_name}@{branch_name}/{filepath_in_repo}",
# 3.7KiB/s
f"https://ghproxy.net/https://raw.githubusercontent.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 有时会 100mb.test
f"https://gh-proxy.com/https://raw.githubusercontent.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
],
)
# 再加入原始地址、一些不可达的
extend_urls(
urls,
[
# timeout
f"https://github.com/{owner}/{repo_name}/raw/{branch_name}/{filepath_in_repo}",
],
)
# 再加入可能已失效的镜像
extend_urls(
urls,
[
# aborted
f"https://ghproxy.com/https://raw.githubusercontent.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 404
f"https://raw.iqiq.io/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 404
f"https://raw.fastgit.ixmu.net/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
# 404
f"https://raw.kgithub.com/{owner}/{repo_name}/{branch_name}/{filepath_in_repo}",
],
)
# 再加入缓存过时内容的镜像,作为最后备选
extend_urls(
urls,
[
# 112.0KiB/s
f"https://gcore.jsdelivr.net/gh/{owner}/{repo_name}@{branch_name}/{filepath_in_repo}",
# 163.8KiB/s
f"https://jsd.cdn.zzko.cn/gh/{owner}/{repo_name}@{branch_name}/{filepath_in_repo}",
],
)
if TEST_SPEED_MODE:
logger.info(color("bold_cyan") + "当前全部镜像如下:\n" + "\n".join(urls) + "\n")
# 开始依次下载,直到成功下载
for idx, url in enumerate(urls):
try:
mirror = extract_mirror_site(
url,
"/" + owner,
"/" + repo_name,
"/" + branch_name,
"@" + branch_name,
"/" + filepath_in_repo,
"https://github.com/",
"https://raw.githubusercontent.com/",
)
log_mirror_status(idx, len(urls), mirror)
file_path = download_file(url, download_dir, connect_timeout=connect_timeout)
if not TEST_SPEED_MODE:
return file_path
else:
logger.warning("\n")
except BaseException as e:
logger.error(f"{idx + 1}/{len(urls)}: 下载失败,异常内容: {e},将继续尝试下一个github镜像")
logger.debug("详细异常信息", exc_info=e)
continue
raise Exception("所有镜像都下载失败")
def log_mirror_status(current_index: int, total_count: int, mirror: str):
logger.info(
f"{current_index + 1}/{total_count}: 尝试镜像: {mirror}" + color("bold_yellow") + "(如果速度较慢,请按 ctrl + c 强制切换下一个镜像)"
)
def extract_mirror_site(mirror_download_url: str, *words_to_remove: str) -> str:
mirror_site = mirror_download_url
for word in words_to_remove:
mirror_site = mirror_site.replace(word, "")
return mirror_site
if __name__ == "__main__":
# download_latest_github_release()
download_github_raw_content("djc_helper.py")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/a915646637/djc_helper.git
git@gitee.com:a915646637/djc_helper.git
a915646637
djc_helper
djc_helper
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891