1 Star 0 Fork 3

taote/Qwen

forked from qzl66/Qwen 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
utils.py 1.35 KB
一键复制 编辑 原始数据 按行查看 历史
import torch
from transformers import AutoModelForCausalLM
from accelerate import dispatch_model
def _device_map(num_gpus, num_layers):
per_gpu_layers = (num_layers + 2) / num_gpus
device_map = {
'transformer.wte': 0,
'transformer.ln_f': 0,
'lm_head': num_gpus-1
}
used = 1
gpu_target = 0
for i in range(num_layers):
if used >= per_gpu_layers:
gpu_target += 1
used = 0 if gpu_target < num_gpus-1 else 1
assert gpu_target < num_gpus
device_map[f'transformer.h.{i}'] = gpu_target
used += 1
return device_map
def load_model_on_gpus(model_name_or_path, num_gpus: int = 2):
num_devices = torch.cuda.device_count()
if num_gpus == 1:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map='auto',
trust_remote_code=True).eval()
elif 1 < num_gpus <= num_devices:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map='cpu',
trust_remote_code=True).eval()
num_layers = model.config.num_hidden_layers
device_map = _device_map(num_gpus, num_layers)
print(device_map)
model = dispatch_model(model, device_map=device_map)
else:
raise KeyError
return model
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/taote/Qwen.git
git@gitee.com:taote/Qwen.git
taote
Qwen
Qwen
main

搜索帮助