From 93dab9a8e0787f575542ab2a80acd3f7ce227c18 Mon Sep 17 00:00:00 2001
From: fengliangjun <fengliangjun@huawei.com>
Date: Wed, 27 Mar 2024 11:34:29 +0800
Subject: [PATCH] provide inference and evaluation

---
 README.md                                     | 153 +++++++++---------
 README_en.md                                  | 145 ++++++++---------
 .../evaluation_llama.py => evaluation.py      |  42 ++---
 examples/aquila/README.md                     |  12 +-
 examples/aquila/README_en.md                  |  12 +-
 .../aquila}/evaluate_aquila_7b_ptd.sh         |   2 +-
 .../aquila}/generate_aquila_7b_ptd.sh         |   2 +-
 examples/baichuan/README.md                   |  21 ++-
 examples/baichuan/README_en.md                |  24 +--
 .../baichuan}/evaluate_baichuan_13B_ptd.sh    |   2 +-
 .../baichuan}/evaluate_baichuan_7B_ptd.sh     |   2 +-
 .../baichuan}/generate_baichuan_13b_ptd.sh    |   2 +-
 .../baichuan}/generate_baichuan_7b_ptd.sh     |   2 +-
 examples/baichuan2/README.md                  |  22 +--
 examples/baichuan2/README_en.md               |  24 +--
 .../baichuan2}/evaluate_baichuan2_13B_ptd.sh  |   2 +-
 .../baichuan2}/evaluate_baichuan2_7B_ptd.sh   |   2 +-
 .../baichuan2}/generate_baichuan2_13b_ptd.sh  |   2 +-
 .../baichuan2}/generate_baichuan2_7b_ptd.sh   |   2 +-
 examples/bloom/README.md                      |  24 +--
 examples/bloom/README_en.md                   |  29 ++--
 .../bloom}/evaluate_bloom_176b_ptd.sh         |   2 +-
 .../bloom}/evaluate_bloom_7b_ptd.sh           |   2 +-
 .../bloom}/generate_bloom_176b_ptd.sh         |   2 +-
 .../bloom}/generate_bloom_7b_ptd.sh           |   2 +-
 examples/intern/README.md                     |  12 +-
 examples/intern/README_en.md                  |  12 +-
 .../intern}/evaluate_internlm_7B_ptd.sh       |   2 +-
 .../intern/generate_internlm_7b_ptd.sh        |   2 +-
 examples/llama/README.md                      |  64 ++++----
 examples/llama/README_en.md                   |  52 +++---
 .../llama}/evaluate_llama_13B_ptd.sh          |   2 +-
 .../llama}/evaluate_llama_33B_ptd.sh          |   2 +-
 .../llama}/evaluate_llama_65B_ptd.sh          |   2 +-
 .../llama}/evaluate_llama_7B_ptd.sh           |   2 +-
 .../llama}/generate_llama_13b_lora_ptd.sh     |   3 +-
 .../llama}/generate_llama_13b_ptd.sh          |   2 +-
 .../llama}/generate_llama_33b_lora_ptd.sh     |   3 +-
 .../llama}/generate_llama_33b_ptd.sh          |   2 +-
 .../llama}/generate_llama_65b_lora_ptd.sh     |   3 +-
 .../llama}/generate_llama_65b_ptd.sh          |   3 +-
 .../llama}/generate_llama_7b_lora_ptd.sh      |   3 +-
 .../llama}/generate_llama_7b_ptd.sh           |   2 +-
 .../llama}/tune_llama_13b_ptd.sh              |   0
 .../llama}/tune_llama_33b_ptd.sh              |   4 +-
 .../llama}/tune_llama_65b_ptd.sh              |   0
 .../llama}/tune_llama_7b_ptd.sh               |   0
 examples/llama2/README.md                     |  76 ++++-----
 examples/llama2/README_en.md                  |  72 ++++-----
 .../llama2}/evaluate_llama2_13B_ptd.sh        |   2 +-
 .../llama2}/evaluate_llama2_34B_ptd.sh        |   2 +-
 .../llama2}/evaluate_llama2_70B_ptd.sh        |   2 +-
 .../llama2}/evaluate_llama2_7B_ptd.sh         |   2 +-
 .../llama2}/generate_llama2_13b_lora_ptd.sh   |   3 +-
 .../llama2}/generate_llama2_13b_ptd.sh        |   2 +-
 .../llama2}/generate_llama2_34B_ptd.sh        |   2 +-
 .../llama2}/generate_llama2_34b_lora_ptd.sh   |   3 +-
 .../llama2}/generate_llama2_70b_lora_ptd.sh   |   3 +-
 .../llama2}/generate_llama2_70b_ptd.sh        |   2 +-
 .../llama2}/generate_llama2_7b_lora_ptd.sh    |   3 +-
 .../llama2}/generate_llama2_7b_ptd.sh         |   2 +-
 .../llama2}/tune_llama2_13b_ptd.sh            |   4 +-
 .../llama2}/tune_llama2_34b_ptd.sh            |   4 +-
 .../llama2}/tune_llama2_70b_ptd.sh            |   0
 .../llama2}/tune_llama2_7b_ptd.sh             |   6 +-
 examples/mixtral/README.md                    |  19 +--
 examples/mixtral/README_en.md                 |  18 +--
 .../mixtral}/evaluate_mixtral_8x7b_ptd.sh     |   2 +-
 .../mixtral}/generate_mixtral_8x7b_ptd.sh     |   3 +-
 examples/qwen/README.md                       |  36 ++---
 examples/qwen/README_en.md                    |  42 ++---
 .../qwen}/evaluate_qwen_14b_ptd.sh            |   2 +-
 .../qwen}/evaluate_qwen_72b_ptd.sh            |   2 +-
 .../qwen}/evaluate_qwen_7b_ptd.sh             |   2 +-
 .../qwen}/generate_qwen_14b_ptd.sh            |   2 +-
 .../qwen}/generate_qwen_72b_ptd.sh            |   2 +-
 .../qwen}/generate_qwen_7b_ptd.sh             |   2 +-
 .../inference_alpaca.py => inference.py       |  19 ++-
 modellink/checkpointing.py                    |   2 +-
 modellink/model/gpt_model.py                  |   2 +-
 modellink/model/transformer.py                |   2 +-
 modellink/tasks/__init__.py                   |  14 ++
 .../tasks/evaluation/__init__.py              |   7 -
 .../tasks/evaluation/eval_api/__init__.py     |  14 ++
 .../tasks}/evaluation/eval_api/chat.py        |   0
 .../evaluation/eval_api/dataset_eval.py       |   2 +-
 .../tasks/evaluation/eval_impl/__init__.py    |  14 ++
 .../tasks}/evaluation/eval_impl/agi_eval.py   |   8 +-
 .../tasks}/evaluation/eval_impl/bbh_eval.py   |   9 +-
 .../tasks}/evaluation/eval_impl/boolq_eval.py |   7 +-
 .../tasks}/evaluation/eval_impl/ceval_exam.py |   9 +-
 .../fewshot_template/AGI_fewshot.json         |   0
 .../fewshot_template/bbh_templatejson         |   0
 .../ceval_5shot_template.json                 |   0
 .../gsm8k_3shot_template.json                 |   0
 .../fewshot_template/mmlu_5shot_template.json |   0
 .../tasks}/evaluation/eval_impl/gsm8k_eval.py |   9 +-
 .../tasks}/evaluation/eval_impl/human_eval.py |  12 +-
 .../tasks}/evaluation/eval_impl/mmlu_eval.py  |   8 +-
 .../tasks/evaluation/eval_impl/template.py    |  21 +++
 modellink/tasks/evaluation/utils.py           |  37 +++++
 .../tasks/finetune}/__init__.py               |   0
 .../tasks/finetune/lora}/__init__.py          |   0
 .../tasks}/finetune/lora/utils.py             |   0
 modellink/tasks/inference/__init__.py         |  14 ++
 .../inference/text_generation/__init__.py     |   0
 .../inference/text_generation/beam_utils.py   |   0
 .../text_generation/communication.py          |   2 +-
 .../inference/text_generation/forward_step.py |   5 +-
 .../inference/text_generation/generation.py   |   2 +-
 .../inference/text_generation/infer_base.py   |   6 +-
 .../inference/text_generation/module.py       |   6 +-
 .../tasks}/inference/text_generation/utils.py |   8 +-
 modellink/training.py                         |   2 +-
 tasks/finetune/__init__.py                    |   0
 tasks/finetune/lora/__init__.py               |   0
 tasks/inference/__init__.py                   |   0
 tasks/inference/inference_llama.py            |  51 ------
 tasks/inference/inference_mixtral.py          |  54 -------
 .../test_convert_weight_from_huggingface.py   |   2 +-
 .../test_convert_weight_from_huggingface.py   |   2 +-
 .../test_convert_ckpt_from_huggingface.py     |   2 +-
 .../test_convert_ckpt_from_huggingface.py     |   2 +-
 .../test_convert_ckpt_from_huggingface.py     |   2 +-
 .../ut/test_convert_ckpt_from_huggingface.py  |   2 +-
 tools/checkpoint/{util.py => convert_ckpt.py} |   0
 tools/checkpoint/convert_util.sh              |  12 --
 127 files changed, 691 insertions(+), 714 deletions(-)
 rename tasks/evaluation/evaluation_llama.py => evaluation.py (81%)
 rename {tasks/evaluation => examples/aquila}/evaluate_aquila_7b_ptd.sh (92%)
 rename {tasks/inference => examples/aquila}/generate_aquila_7b_ptd.sh (95%)
 rename {tasks/evaluation => examples/baichuan}/evaluate_baichuan_13B_ptd.sh (94%)
 rename {tasks/evaluation => examples/baichuan}/evaluate_baichuan_7B_ptd.sh (94%)
 rename {tasks/inference => examples/baichuan}/generate_baichuan_13b_ptd.sh (94%)
 rename {tasks/inference => examples/baichuan}/generate_baichuan_7b_ptd.sh (94%)
 rename {tasks/evaluation => examples/baichuan2}/evaluate_baichuan2_13B_ptd.sh (95%)
 rename {tasks/evaluation => examples/baichuan2}/evaluate_baichuan2_7B_ptd.sh (94%)
 rename {tasks/inference => examples/baichuan2}/generate_baichuan2_13b_ptd.sh (94%)
 rename {tasks/inference => examples/baichuan2}/generate_baichuan2_7b_ptd.sh (94%)
 rename {tasks/evaluation => examples/bloom}/evaluate_bloom_176b_ptd.sh (94%)
 rename {tasks/evaluation => examples/bloom}/evaluate_bloom_7b_ptd.sh (94%)
 rename {tasks/inference => examples/bloom}/generate_bloom_176b_ptd.sh (94%)
 rename {tasks/inference => examples/bloom}/generate_bloom_7b_ptd.sh (92%)
 rename {tasks/evaluation => examples/intern}/evaluate_internlm_7B_ptd.sh (94%)
 rename tasks/inference/generate_lnternlm_7b_ptd.sh => examples/intern/generate_internlm_7b_ptd.sh (95%)
 rename {tasks/evaluation => examples/llama}/evaluate_llama_13B_ptd.sh (91%)
 rename {tasks/evaluation => examples/llama}/evaluate_llama_33B_ptd.sh (94%)
 rename {tasks/evaluation => examples/llama}/evaluate_llama_65B_ptd.sh (91%)
 rename {tasks/evaluation => examples/llama}/evaluate_llama_7B_ptd.sh (91%)
 rename {tasks/inference => examples/llama}/generate_llama_13b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama}/generate_llama_13b_ptd.sh (94%)
 rename {tasks/inference => examples/llama}/generate_llama_33b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama}/generate_llama_33b_ptd.sh (94%)
 rename {tasks/inference => examples/llama}/generate_llama_65b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama}/generate_llama_65b_ptd.sh (93%)
 rename {tasks/inference => examples/llama}/generate_llama_7b_lora_ptd.sh (92%)
 rename {tasks/inference => examples/llama}/generate_llama_7b_ptd.sh (92%)
 rename {tasks/finetune => examples/llama}/tune_llama_13b_ptd.sh (100%)
 rename {tasks/finetune => examples/llama}/tune_llama_33b_ptd.sh (96%)
 rename {tasks/finetune => examples/llama}/tune_llama_65b_ptd.sh (100%)
 rename {tasks/finetune => examples/llama}/tune_llama_7b_ptd.sh (100%)
 rename {tasks/evaluation => examples/llama2}/evaluate_llama2_13B_ptd.sh (94%)
 rename {tasks/evaluation => examples/llama2}/evaluate_llama2_34B_ptd.sh (95%)
 rename {tasks/evaluation => examples/llama2}/evaluate_llama2_70B_ptd.sh (91%)
 rename {tasks/evaluation => examples/llama2}/evaluate_llama2_7B_ptd.sh (95%)
 rename {tasks/inference => examples/llama2}/generate_llama2_13b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama2}/generate_llama2_13b_ptd.sh (94%)
 rename {tasks/inference => examples/llama2}/generate_llama2_34B_ptd.sh (95%)
 rename {tasks/inference => examples/llama2}/generate_llama2_34b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama2}/generate_llama2_70b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama2}/generate_llama2_70b_ptd.sh (94%)
 rename {tasks/inference => examples/llama2}/generate_llama2_7b_lora_ptd.sh (94%)
 rename {tasks/inference => examples/llama2}/generate_llama2_7b_ptd.sh (95%)
 rename {tasks/finetune => examples/llama2}/tune_llama2_13b_ptd.sh (96%)
 rename {tasks/finetune => examples/llama2}/tune_llama2_34b_ptd.sh (96%)
 rename {tasks/finetune => examples/llama2}/tune_llama2_70b_ptd.sh (100%)
 rename {tasks/finetune => examples/llama2}/tune_llama2_7b_ptd.sh (94%)
 rename {tasks/evaluation => examples/mixtral}/evaluate_mixtral_8x7b_ptd.sh (96%)
 rename {tasks/inference => examples/mixtral}/generate_mixtral_8x7b_ptd.sh (95%)
 rename {tasks/evaluation => examples/qwen}/evaluate_qwen_14b_ptd.sh (96%)
 rename {tasks/evaluation => examples/qwen}/evaluate_qwen_72b_ptd.sh (96%)
 rename {tasks/evaluation => examples/qwen}/evaluate_qwen_7b_ptd.sh (96%)
 rename {tasks/inference => examples/qwen}/generate_qwen_14b_ptd.sh (96%)
 rename {tasks/inference => examples/qwen}/generate_qwen_72b_ptd.sh (96%)
 rename {tasks/inference => examples/qwen}/generate_qwen_7b_ptd.sh (96%)
 rename tasks/inference/inference_alpaca.py => inference.py (68%)
 create mode 100644 modellink/tasks/__init__.py
 rename tasks/evaluation/eval_impl/template.py => modellink/tasks/evaluation/__init__.py (55%)
 create mode 100644 modellink/tasks/evaluation/eval_api/__init__.py
 rename {tasks => modellink/tasks}/evaluation/eval_api/chat.py (100%)
 rename {tasks => modellink/tasks}/evaluation/eval_api/dataset_eval.py (96%)
 create mode 100644 modellink/tasks/evaluation/eval_impl/__init__.py
 rename {tasks => modellink/tasks}/evaluation/eval_impl/agi_eval.py (95%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/bbh_eval.py (94%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/boolq_eval.py (95%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/ceval_exam.py (94%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/fewshot_template/AGI_fewshot.json (100%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/fewshot_template/bbh_templatejson (100%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json (100%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json (100%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json (100%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/gsm8k_eval.py (95%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/human_eval.py (94%)
 rename {tasks => modellink/tasks}/evaluation/eval_impl/mmlu_eval.py (95%)
 create mode 100644 modellink/tasks/evaluation/eval_impl/template.py
 create mode 100644 modellink/tasks/evaluation/utils.py
 rename {tasks => modellink/tasks/finetune}/__init__.py (100%)
 rename {tasks/evaluation => modellink/tasks/finetune/lora}/__init__.py (100%)
 rename {tasks => modellink/tasks}/finetune/lora/utils.py (100%)
 create mode 100644 modellink/tasks/inference/__init__.py
 rename {tasks => modellink/tasks}/inference/text_generation/__init__.py (100%)
 rename {tasks => modellink/tasks}/inference/text_generation/beam_utils.py (100%)
 rename {tasks => modellink/tasks}/inference/text_generation/communication.py (99%)
 rename {tasks => modellink/tasks}/inference/text_generation/forward_step.py (98%)
 rename {tasks => modellink/tasks}/inference/text_generation/generation.py (99%)
 rename {tasks => modellink/tasks}/inference/text_generation/infer_base.py (98%)
 rename {tasks => modellink/tasks}/inference/text_generation/module.py (98%)
 rename {tasks => modellink/tasks}/inference/text_generation/utils.py (99%)
 delete mode 100644 tasks/finetune/__init__.py
 delete mode 100644 tasks/finetune/lora/__init__.py
 delete mode 100644 tasks/inference/__init__.py
 delete mode 100644 tasks/inference/inference_llama.py
 delete mode 100644 tasks/inference/inference_mixtral.py
 rename tools/checkpoint/{util.py => convert_ckpt.py} (100%)
 delete mode 100644 tools/checkpoint/convert_util.sh

diff --git a/README.md b/README.md
index 868df24bbd3..b19885c07fa 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,6 @@
 <p align="center"> <img src="sources/images/logo.png" height="90px" width="400px"> </p>
 
 <p align="center">
-    <a>
-        <img alt="Docs" src="https://readthedocs.org/projects/internlm/badge/?version=latest">
-    </a>
     <a href="https://gitee.com/ascend/AscendSpeed/blob/master/LICENSE">
     <a href="https://gitee.com/ascend/AscendSpeed/blob/master/LICENSE">
         <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
@@ -56,61 +53,61 @@ ModelLink旨在为华为 [昇腾芯片](https://open.codehub.huawei.com/OpenBaiz
       <td rowspan="1"> <a href="examples/aquila/README.md">Aquila</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_aquila_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_aquila_7b_ptd.sh">评估</a> </td>
+      <td> <a href="examples/aquila/generate_aquila_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/aquila/evaluate_aquila_7b_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/baichuan/generate_baichuan_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
-      <td> <a href="tasks/finetune/tune_baichuan_ptd_13B.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_baichuan_13b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_13B_ptd.sh">评估</a> </td>
+      <td> -- </td>
+      <td> <a href="examples/baichuan/generate_baichuan_13b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_13B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan2</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_13b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_13B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_13b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_13B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/bloom/README.md">Bloom</a></td>
       <td> 7B1 </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/bloom/evaluate_bloom_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td> 176B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_176b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_bloom_176b_ptd.sh">评估</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_176b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/bloom/evaluate_bloom_176b_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/intern/README.md">InternLM</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_lnternlm_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_internlm_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/intern/generate_lnternlm_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/intern/evaluate_internlm_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
@@ -123,89 +120,89 @@ ModelLink旨在为华为 [昇腾芯片](https://open.codehub.huawei.com/OpenBaiz
     <tr>
       <td rowspan="4"><a href="examples/llama/README.md">LLaMA</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama_7b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_7b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/llama/tune_llama_7b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_7b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama_13b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_13b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_13B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/llama/tune_llama_13b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_13b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_13B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>33B</td>
-      <td> <a href="tasks/finetune/tune_llama_33b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_33b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_33B_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/llama/tune_llama_33b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_33b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_33B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td > 65B </td>
-      <td > <a href="tasks/finetune/tune_llama_65b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_65b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_65B_ptd.sh">评估 </a> </td>
+      <td > <a href="examples/llama/tune_llama_65b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_65b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_65B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="4"><a href="examples/llama2/README.md">LLaMA2</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama2_7b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_7b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_7B_ptd.sh">评估 </a>  </td>
+      <td> <a href="examples/llama2/tune_llama2_7b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_7b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_7B_ptd.sh">评估 </a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama2_13b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_13b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_13B_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/llama2/tune_llama2_13b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_13b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_13B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td> 34B </td>
-      <td> <a href="tasks/finetune/tune_llama2_34b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_34B_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_34B_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/llama2/tune_llama2_34b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_34B_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_34B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td > 70B </td>
-      <td > <a href="tasks/finetune/tune_llama2_70b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama2_70b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_70B_ptd.sh">评估 </a> </td>
+      <td > <a href="examples/llama2/tune_llama2_70b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama2/generate_llama2_70b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_70B_ptd.sh">评估 </a> </td>
       <td>  <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="3"><a href="examples/qwen/README.md">Qwen</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_7b_ptd.sh">对话 </a></td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_7b_ptd.sh">评估 </a></td>
+      <td> <a href="examples/qwen/generate_qwen_7b_ptd.sh">对话 </a></td>
+      <td> <a href="examples/qwen/evaluate_qwen_7b_ptd.sh">评估 </a></td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>14B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_14b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_14b_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/qwen/generate_qwen_14b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_14b_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>72B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_72b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_72b_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/qwen/generate_qwen_72b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_72b_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td rowspan="1"><a href="examples/mixtral/README.md">Mixtral</a></td>
       <td>8x7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_mixtral_8x7b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh">评估 </a>  </td>
+      <td> <a href="examples/mixtral/generate_mixtral_8x7b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/mixtral/evaluate_mixtral_8x7b_ptd.sh">评估 </a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
   </tbody>
@@ -630,7 +627,7 @@ ModelLink:
 1.  如果你尝试使用 huggingface 的模型权重，请首先进行权重转换， 以 Llama-7B 为例:
       - PTD 策略的转换
            ```bash
-            python tools/checkpoint/util.py --model-type GPT \
+            python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                             --loader llama2_hf \
                                             --saver megatron \
                                             --target-tensor-parallel-size 1 \
@@ -645,11 +642,11 @@ ModelLink:
 
     - 仅仅使用 PTD 策略训练的模型：在这种模式下，模型以 Megatron-LM 的风格被 流水并行 和 张量并行 切分
         ```bash
-        sh tasks/inference/generate_llama_7b_ptd.sh
+        sh examples/llama/generate_llama_7b_ptd.sh
         ```
     - 如果你仅仅使用 Lora, 可以参考:
         ```bash
-        sh tasks/inference/generate_llama_7b_lora_ptd.sh
+        sh examples/llama/generate_llama_7b_lora_ptd.sh
         ```
 
 #### 使用手册
@@ -836,7 +833,7 @@ VOCAB_FILE=../models/llama7b-hf/
 DATA_PATH="dataset/boolq/test"
 TASK="boolq"
 # 配置生成参数
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -857,13 +854,13 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
        --micro-batch-size 1  \
        --seed 42 | tee logs/train.log
 # 开启评估
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
 ```
 
 最重要的评估参数是 `--max-new-tokens`, 它表示模型输出的生成长度，比如，多项选择问题的输出长度就会明显比编码任务的输出长度小，该参数也很大程度上影响了模型的评估性能。通过--evaluation-batch-size参数可以设置多batch推理，提升模型评估性能。
 
 ```bash
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -899,7 +896,7 @@ HumanEval 是一个用于挑战代码生成问题的数据集，具有164个编
 ##### BoolQ
 
 BoolQ 是一个 yes/no 的问答数据集， 每一个问题包含了一个（问题，文章，答案）三元组，同时有文章的标题作为额外的选择性输入。BoolQ 数据集的评估相对简单，只需要配置 `TASK="boolq"`, `--max-new-token=1`。
-零样本评估的结果通常会被给定的 prompt 影响，可以尝试通过在 `tasks/evaluation/evaluation.py` 中设置合适的 prompt 得到更高的分数，
+零样本评估的结果通常会被给定的 prompt 影响，可以尝试通过在 `evaluation.py` 中设置合适的 prompt 得到更高的分数，
 
 ```bash
 # 通过修改 template 更新prompt
@@ -920,44 +917,42 @@ GSM8K 是一个有8.5k高质量小学数学应用题文本的数据集，每一
 在很多网站，MMLU 的精度会依据学科进行评估，57个学科主要属于四个大类， 因此该数据集也可以基于四个大类进行打分，[网站](https://github.com/hendrycks/test/blob/master/categories.py) 给出了具体的57个类别。
 
 ### <span id="jump18">使用加速特性
-ModelLink支持张量并行、流水线并行、序列并行、重计算、分布式优化器、等多种加速算法，下表为各种加速特性对应的使能开关：
+ModelLink支持张量并行、流水线并行、序列并行、重计算、分布式优化器等多种加速算法和融合算子，下表为各种加速特性对应的使能开关：
+
+|                 加速特性                 |              使能参数              |
+|:------------------------------------:|:------------------------------:|
+|                 张量并行                 |  --tensor-model-parallel-size  |
+|                流水线并行                 | --pipeline-model-parallel-size |
+|               流水并行动态划分               |        --num-layer-list        |
+|                 序列并行                 |      --sequence-parallel       |
+|                 重计算                  |    --recompute-granularity     |
+|                分布式优化器                |  --use-distributed-optimizer   |
+|           DDP allreduce 掩盖           |  --overlap-grad-reduce   |
+|           Flash attention            |        --use-flash-attn        |
+|            Fused rmsnorm             |      --use-fused-rmsnorm       |
+|             Fused swiglu             |            --swiglu            |
+| Fused rotary <br/>position embedding |   --use-fused-rotary-pos-emb   |
 
-|   加速特性    |   使能参数   |
-|:---------:|:--------:|
-|   张量并行    |  --tensor-model-parallel-size  |
-|   流水线并行   | --pipeline-model-parallel-size |
-|   序列并行    | --sequence-parallel |
-|    重计算    | --recompute-granularity |
-|  分布式优化器   | --use-distributed-optimizer |
 
 ```bash
 torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --tensor-model-parallel-size ${TP} \
     --pipeline-model-parallel-size ${PP} \
+    --num-layer-list 1,2,2,2,1 \
     --sequence-parallel \
     --recompute-granularity full \
     --recompute-method block \
     --recompute-num-layers 72 \
     --use-distributed-optimizer \
-    ... \
-    ...
-```
-在模型训练和下游任务中，可以使用--tensor-model-parallel-size和--pipeline-model-parallel-size将模型切分到不同的设备上，降低单设备内存占用。
-使用--sequence-parallel对数据进行切分，使得模型可以训练超长序列数据。recompute-granularity或者use-distributed-optimizer也能降低单设备内存占用。
-
-ModelLink支持attn、rmsnorm、swiglu、rotary-pos-emb、mlp等多个融合算子，提升模型训练性能：
-```bash
-torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --use-flash-attn \
     --use-fused-rmsnorm \
-    --use-fused-swiglu \
+    --swiglu \
+    --overlap-grad-reduce \
     --use-fused-rotary-pos-emb \
-    --mlp-layer-fusion \
     ... \
     ...
 ```
 
-
 ## 致谢
 
 ---
diff --git a/README_en.md b/README_en.md
index a0b6b85c38d..b88c6b95c56 100644
--- a/README_en.md
+++ b/README_en.md
@@ -1,9 +1,6 @@
 <p align="center"> <img src="sources/images/logo.png" height="90px" width="400px"> </p>
 
 <p align="center">
-    <a>
-        <img alt="Docs" src="https://readthedocs.org/projects/internlm/badge/?version=latest">
-    </a>
     <a href="https://gitee.com/ascend/AscendSpeed/blob/master/LICENSE">
         <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
     </a>
@@ -56,61 +53,61 @@ Current ModelLink supports pre-training and fine-tuning for the following models
       <td rowspan="1"> <a href="examples/aquila/README.md">Aquila</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_aquila_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_aquila_7b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/aquila/generate_aquila_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/aquila/evaluate_aquila_7b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/baichuan/generate_baichuan_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
-      <td> <a href="tasks/finetune/tune_baichuan_ptd_13B.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_baichuan_13b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_13B_ptd.sh">evaluation</a> </td>
+      <td> -- </td>
+      <td> <a href="examples/baichuan/generate_baichuan_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan2</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_13b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_13B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/bloom/README.md">Bloom</a></td>
       <td> 7B1 </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan/evaluate_bloom_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td> 176B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_176b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_bloom_176b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_176b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/bloom/evaluate_bloom_176b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/intern/README.md">InternLM</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_lnternlm_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_internlm_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/intern/generate_lnternlm_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/intern/evaluate_internlm_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
@@ -123,89 +120,89 @@ Current ModelLink supports pre-training and fine-tuning for the following models
     <tr>
       <td rowspan="4"><a href="examples/llama/README.md">LLaMA</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama_7b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_7b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama/tune_llama_7b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama_13b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_13b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_13B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama/tune_llama_13b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>33B</td>
-      <td> <a href="tasks/finetune/tune_llama_33b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_33b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_33B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama/tune_llama_33b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_33b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_33B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td > 65B </td>
-      <td > <a href="tasks/finetune/tune_llama_65b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_65b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_65B_ptd.sh">evaluation</a> </td>
+      <td > <a href="examples/llama/tune_llama_65b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_65b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_65B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="4"><a href="examples/llama2/README.md">LLaMA2</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama2_7b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_7B_ptd.sh">evaluation</a>  </td>
+      <td> <a href="examples/llama2/tune_llama2_7b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_7B_ptd.sh">evaluation</a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama2_13b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_13b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_13B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama2/tune_llama2_13b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td> 34B </td>
-      <td> <a href="tasks/finetune/tune_llama2_34b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_34B_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_34B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama2/tune_llama2_34b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_34B_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_34B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td > 70B </td>
-      <td > <a href="tasks/finetune/tune_llama2_70b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama2_70b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_70B_ptd.sh">evaluation</a> </td>
+      <td > <a href="examples/llama2/tune_llama2_70b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama2/generate_llama2_70b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_70B_ptd.sh">evaluation</a> </td>
       <td>  <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="3"><a href="examples/qwen/README.md">Qwen</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_7b_ptd.sh">inference</a></td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_7b_ptd.sh">evaluation</a></td>
+      <td> <a href="examples/qwen/generate_qwen_7b_ptd.sh">inference</a></td>
+      <td> <a href="examples/qwen/evaluate_qwen_7b_ptd.sh">evaluation</a></td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>14B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_14b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_14b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/qwen/generate_qwen_14b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_14b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>72B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_72b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_72b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/qwen/generate_qwen_72b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_72b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td rowspan="1"><a href="examples/mixtral/README.md">Mixtral</a></td>
       <td>8x7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_mixtral_8x7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh">evaluation</a>  </td>
+      <td> <a href="examples/mixtral/generate_mixtral_8x7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/mixtral/evaluate_mixtral_8x7b_ptd.sh">evaluation</a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
   </tbody>
@@ -644,7 +641,7 @@ Currently, we support the following four cases of inference:
 
       - PTD only
            ```bash
-            python tools/checkpoint/util.py --model-type GPT \
+            python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                             --loader llama2_hf \
                                             --saver megatron \
                                             --target-tensor-parallel-size 1 \
@@ -662,7 +659,7 @@ Currently, we support the following four cases of inference:
         ```
     - **If you want to use lora model**, for details, refer to:
         ```bash
-        sh tasks/inference/generate_llama_7b_lora_ptd.sh
+        sh examples/llama/generate_llama_7b_lora_ptd.sh
         ```
 
 #### Usage Guide
@@ -848,7 +845,7 @@ VOCAB_FILE=../models/llama7b-hf/
 DATA_PATH="dataset/boolq/test"
 TASK="boolq"
 # configure generation parameters
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -868,7 +865,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
        --micro-batch-size 1  \
        --seed 42 | tee logs/train.log
 # start evaluation
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
 ```
 
 #### Task Introduction
@@ -876,7 +873,7 @@ The most important evaluation parameters must be `--max-new-tokens`, which means
 questions' output length is obviously shorter than coding tasks. Besides, this parameter largely decides the speed of model generation.
 
 ```bash
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -929,13 +926,19 @@ you want to evaluate on test data, you should email your results to [C-Eval](htt
 ### <span id="jump18">Acceleration Features
 ModelLink supports various acceleration algorithms such as tensor parallelism, pipeline parallelism, sequence parallelism, recomputation, distributed optimizer, and more. The table below shows the enable switches corresponding to each acceleration feature:
 
-|   Acceleration Feature    |   Enable Parameter   |
-|:---------:|:--------:|
-|   Tensor Parallel    |  --tensor-model-parallel-size  |
-|   Pipeline Parallel   | --pipeline-model-parallel-size |
-|   Sequence Parallel    | --sequence-parallel |
-|    Recomputation    | --recompute-granularity |
-|  Distributed Optimizer   | --use-distributed-optimizer |
+|         Acceleration Feature         |        Enable Parameter        |
+|:------------------------------------:|:------------------------------:|
+|           Tensor Parallel            |  --tensor-model-parallel-size  |
+|          Pipeline Parallel           | --pipeline-model-parallel-size |
+|       Dynamic division for PP        |        --num-layer-list        |
+|          Sequence Parallel           |      --sequence-parallel       |
+|            Recomputation             |    --recompute-granularity     |
+|        Distributed Optimizer         |  --use-distributed-optimizer   |
+|        overlap DDP allreduce         |  --overlap-grad-reduce   |
+|           Flash attention            |        --use-flash-attn        |
+|            Fused rmsnorm             |      --use-fused-rmsnorm       |
+|             Fused swiglu             |            --swiglu            |
+| Fused rotary <br/>position embedding |   --use-fused-rotary-pos-emb   |
 
 ```bash
 torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
@@ -946,25 +949,15 @@ torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --recompute-method block \
     --recompute-num-layers 72 \
     --use-distributed-optimizer \
-    ... \
-    ...
-```
-In model training and downstream tasks, you can use --tensor-model-parallel-size and --pipeline-model-parallel-size to distribute the model across different devices, reducing memory usage per device.
-Using --sequence-parallel to split the data enables the model to train on extremely long sequences. The options recompute-granularity or use-distributed-optimizer can also reduce memory usage per device.
-
-ModelLink supports multiple fused operators such as attn, rmsnorm, swiglu, rotary-pos-emb, mlp, etc., to improve model training performance:
-```bash
-torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --use-flash-attn \
     --use-fused-rmsnorm \
-    --use-fused-swiglu \
+    --swiglu \
     --use-fused-rotary-pos-emb \
-    --mlp-layer-fusion \
+    --overlap-grad-reduce \
     ... \
     ...
 ```
 
-
 ## Acknowledgments
 
 ---
diff --git a/tasks/evaluation/evaluation_llama.py b/evaluation.py
similarity index 81%
rename from tasks/evaluation/evaluation_llama.py
rename to evaluation.py
index 95fb2f1b880..352767c076d 100644
--- a/tasks/evaluation/evaluation_llama.py
+++ b/evaluation.py
@@ -26,14 +26,16 @@ from megatron.initialize import initialize_megatron
 from megatron import get_args
 from megatron.model import GPTModel
 from megatron.arguments import core_transformer_config_from_args
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.boolq_eval import BoolqEval
-from tasks.evaluation.eval_impl.gsm8k_eval import Gsm8kEval
-from tasks.evaluation.eval_impl.mmlu_eval import MmluEval
-from tasks.evaluation.eval_impl.ceval_exam import CEvalExam
-from tasks.evaluation.eval_impl.bbh_eval import BBHEval
-from tasks.evaluation.eval_impl.agi_eval import AGIEvalExam
-from tasks.evaluation.eval_impl.human_eval import HumanEval
+
+from modellink.tasks.evaluation.utils import add_text_generate_args
+from modellink.tasks.evaluation.eval_api.chat import Chat
+from modellink.tasks.evaluation.eval_impl.boolq_eval import BoolqEval
+from modellink.tasks.evaluation.eval_impl.gsm8k_eval import Gsm8kEval
+from modellink.tasks.evaluation.eval_impl.mmlu_eval import MmluEval
+from modellink.tasks.evaluation.eval_impl.ceval_exam import CEvalExam
+from modellink.tasks.evaluation.eval_impl.bbh_eval import BBHEval
+from modellink.tasks.evaluation.eval_impl.agi_eval import AGIEvalExam
+from modellink.tasks.evaluation.eval_impl.human_eval import HumanEval
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
 logging.getLogger().setLevel(logging.INFO)
@@ -53,30 +55,6 @@ def model_provider(pre_process=True, post_process=True):
     return init_model
 
 
-def add_text_generate_args(parser):
-    group = parser.add_argument_group(title='text generation')
-    group.add_argument("--task-data-path",
-                       nargs='*',
-                       default=[],
-                       help='Path to the training dataset. Accepted format:'
-                            '1) a single data path, 2) multiple datasets in the'
-                            'form: dataset1-path dataset2-path ...')
-    group.add_argument("--temperature", type=float, default=0.5,
-                       help='Sampling temperature.')
-    group.add_argument("--evaluation-batch-size", type=int, default=1,
-                       help='Size of evaluation batch')
-    group.add_argument("--greedy", action='store_true', default=False,
-                       help='Use greedy sampling.')
-    group.add_argument("--top-p", type=float, default=0.9,
-                       help='Top p sampling.')
-    group.add_argument("--top-k", type=int, default=0,
-                       help='Top k sampling.')
-    group.add_argument("--max-new-tokens", type=int, default=128,
-                       help='Size of the output generated text.')
-    group.add_argument("--task", nargs='*', default=[], help='Choose one task from mmlu, boolq and gsm8k')
-    return parser
-
-
 def get_result(result):
     if result:
         final_results = []
diff --git a/examples/aquila/README.md b/examples/aquila/README.md
index 9875bafa03b..dfcbd2bb3d3 100644
--- a/examples/aquila/README.md
+++ b/examples/aquila/README.md
@@ -98,7 +98,7 @@ cd ModelLink/
 mkdir model_weights
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --load-dir ./HF_Aquila7B_downloaded \
     --save-dir ./model_weights/aquila \
@@ -114,7 +114,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -172,7 +172,7 @@ Aquila-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 推理与预训练不同，我们必须加载预训练权重，请注意：在转换权重时使用的模型结构参数，和运行评估任务时使用的模型结构参数，应保持一致。
 
-权重转换完成后，我们配置Aquila-7B推理脚本`tasks/inference/generate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径等（下面样例仅供参考）
+权重转换完成后，我们配置Aquila-7B推理脚本`example/aquila/generate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径等（下面样例仅供参考）
 
 ```shell
 # 请按实际情况修改模型权重路径和分词器路径
@@ -182,7 +182,7 @@ TOKENIZER_PATH="./HF_Aquila7B_downloaded/"
 
 启动Aquila-7B推理:
 ```shell
-bash ./tasks/inference/generate_aquila_7b_ptd.sh
+bash examples/aquila/generate_aquila_7b_ptd.sh
 ```
 
 部分推理样本如下：
@@ -198,7 +198,7 @@ Aquila-7B:
 
 评估与推理类似，也必须加载转换后的权重，请注意：在转换权重时使用的模型结构参数，和运行评估任务时使用的模型结构参数，应保持一致。
 
-权重转换完成后，我们配置Aquila-7B评估脚本 `tasks/evaluation/evaluate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径，评估数据的路径，以及评估任务的名字等(下面样例仅供参考)：
+权重转换完成后，我们配置Aquila-7B评估脚本 `examples/aquila/evaluate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径，评估数据的路径，以及评估任务的名字等(下面样例仅供参考)：
 
 ```shell
 CKPT_LOAD_DIR="./model_weights/aquila/"
@@ -209,7 +209,7 @@ TASK="boolq"
 
 启动Aquila-7B评估
 ```shell
-bash tasks/evaluation/evaluate_aquila_7b_ptd.sh
+bash examples/aquila/evaluate_aquila_7b_ptd.sh
 ```
 
 Aquila-7B在**Ascend NPU**中的评测表现：
diff --git a/examples/aquila/README_en.md b/examples/aquila/README_en.md
index 3b1b5479f0f..3afc181527c 100644
--- a/examples/aquila/README_en.md
+++ b/examples/aquila/README_en.md
@@ -97,7 +97,7 @@ mkdir model_weights
 # please modify the path to set_env.sh based on your environment.
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --load-dir ./HF_Aquila7B_downloaded \
     --save-dir ./model_weights/aquila \
@@ -113,7 +113,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -166,7 +166,7 @@ The performance of Aquila-7B in Ascend NPU and reference device:
 
 We support AscendSpeed Inference for text generation with Aquila 7B model.
 
-Inference is different from pre-training because it requires loading the pre-trained model weights. Therefore, we need to complete the aforementioned model weight conversion task first, then configure the Aquila-7B Inference shell script `tasks/inference/generate_aquila_7b_ptd.sh`. "CKPT_LOAD_DIR" must point to the converted weights directory, and "TOKENIZER_PATH" must point to the directory which contains Aquila vocabulary files -- in our example, it is "./HF_Aquila7B_downloaded". In your operation, please fill in correct value based on your actual scenario.
+Inference is different from pre-training because it requires loading the pre-trained model weights. Therefore, we need to complete the aforementioned model weight conversion task first, then configure the Aquila-7B Inference shell script `examples/aquila/generate_aquila_7b_ptd.sh`. "CKPT_LOAD_DIR" must point to the converted weights directory, and "TOKENIZER_PATH" must point to the directory which contains Aquila vocabulary files -- in our example, it is "./HF_Aquila7B_downloaded". In your operation, please fill in correct value based on your actual scenario.
 
 ```shell
 # please change to actual values
@@ -176,7 +176,7 @@ TOKENIZER_PATH="./HF_Aquila7B_downloaded/"
 
 Start Aquila-7B Inference:
 ```shell
-bash ./tasks/inference/generate_aquila_7b_ptd.sh
+bash ./examples/aquila/generate_aquila_7b_ptd.sh
 ```
 
 Sample results of Aquila-7B Inference:
@@ -190,7 +190,7 @@ We use BoolQ benchmark to evaluate our model. You can [go to the BoolQ Benchmark
 
 Evaluation task is similar to inference task too，it also requires loading the pre-trained model weights. Please note that the model structure parameters used in converting weights should be consistent with those used in running the evaluation task.
 
-After weight conversion is complete, we configure the Aquila-7B evaluation script `tasks/evaluation/evaluate_aquila_7b_ptd.sh`. We need to correctly specify the path to load weights, the path to tokenizer and vocab, and so on (the following example is for reference only)
+After weight conversion is complete, we configure the Aquila-7B evaluation script `examples/aquila/evaluate_aquila_7b_ptd.sh`. We need to correctly specify the path to load weights, the path to tokenizer and vocab, and so on (the following example is for reference only)
 
 ```shell
     CKPT_LOAD_DIR="./model_weights/aquila/"
@@ -201,7 +201,7 @@ After weight conversion is complete, we configure the Aquila-7B evaluation scrip
 
 Start evaluation task
 ```shell
-bash ./tasks/evaluation/evaluate_aquila_7b_ptd.sh
+bash ./examples/aquila/evaluate_aquila_7b_ptd.sh
 ```
 
 Sample Aquila-7B performance running in **Ascend NPU**:
diff --git a/tasks/evaluation/evaluate_aquila_7b_ptd.sh b/examples/aquila/evaluate_aquila_7b_ptd.sh
similarity index 92%
rename from tasks/evaluation/evaluate_aquila_7b_ptd.sh
rename to examples/aquila/evaluate_aquila_7b_ptd.sh
index d4ae5d89121..e46c9b87a23 100644
--- a/tasks/evaluation/evaluate_aquila_7b_ptd.sh
+++ b/examples/aquila/evaluate_aquila_7b_ptd.sh
@@ -26,7 +26,7 @@ DISTRIBUTED_ARGS="
     "
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py \
        --task-data-path $EVAL_DATA_PATH \
        --task $TASK\
        --tensor-model-parallel-size ${TP} \
diff --git a/tasks/inference/generate_aquila_7b_ptd.sh b/examples/aquila/generate_aquila_7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_aquila_7b_ptd.sh
rename to examples/aquila/generate_aquila_7b_ptd.sh
index 906eb2c22fd..cd17e500296 100644
--- a/tasks/inference/generate_aquila_7b_ptd.sh
+++ b/examples/aquila/generate_aquila_7b_ptd.sh
@@ -23,7 +23,7 @@ DISTRIBUTED_ARGS="
     --master_port $MASTER_PORT
     "
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/inference/inference.py \
        --tensor-model-parallel-size ${TP} \
        --pipeline-model-parallel-size ${PP} \
        --num-layers 32 \
diff --git a/examples/baichuan/README.md b/examples/baichuan/README.md
index a2e58067fe6..be0f5272a7d 100644
--- a/examples/baichuan/README.md
+++ b/examples/baichuan/README.md
@@ -22,7 +22,6 @@
     - [脚本](#脚本)
     - [性能](#性能)
         - [吞吐](#吞吐)
-  - [Lora微调](#Lora微调)
   - [推理](#推理)
   - [评估](#评估)
 
@@ -103,7 +102,7 @@ mkdir baichuan-7B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -120,7 +119,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -188,7 +187,7 @@ Baichuan-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-首先需要配置baichuan-7B的推理脚本: tasks/inference/generate_baichuan_7b_ptd.sh
+首先需要配置baichuan-7B的推理脚本: examples/baichuan/generate_baichuan_7b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -202,7 +201,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan_7b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan_7b_ptd.sh
+bash examples/baichuan/generate_baichuan_7b_ptd.sh
 ```
 
 推理的示例如下:
@@ -224,7 +223,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_7B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_7B_ptd.sh
 ```
 
 <table>
@@ -337,7 +336,7 @@ mkdir baichuan-13B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -355,7 +354,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -422,7 +421,7 @@ Baichuan-13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 
 ## 推理
 
-配置baichuan-13B的推理脚本: tasks/inference/generate_baichuan_13b_ptd.sh
+配置baichuan-13B的推理脚本: examples/baichuan/generate_baichuan_13b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -436,7 +435,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan_13b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan_13b_ptd.sh
+bash examples/baichuan/generate_baichuan_13b_ptd.sh
 ```
 
 推理的示例如下:
@@ -457,7 +456,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_13B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_13B_ptd.sh
 ```
 
 <table>
diff --git a/examples/baichuan/README_en.md b/examples/baichuan/README_en.md
index 85038acc51d..2c0eac050d0 100644
--- a/examples/baichuan/README_en.md
+++ b/examples/baichuan/README_en.md
@@ -101,7 +101,7 @@ mkdir baichuan-7B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -117,7 +117,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -184,7 +184,7 @@ The performance of Baichuan-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan-7B inference script: tasks/inference/generate_baichuan_7b_ptd.sh
+Config baichuan-7B inference script: examples/baichuan/generate_baichuan_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -193,9 +193,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan-7B inference script: tasks/inference/generate_baichuan_7b_ptd.sh
+Launch baichuan-7B inference script: examples/baichuan/generate_baichuan_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan_7b_ptd.sh
+bash examples/baichuan/generate_baichuan_7b_ptd.sh
 ```
 Some inference samples are as follows:
 
@@ -217,7 +217,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_7B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_7B_ptd.sh
 ```
 
 <table>
@@ -330,7 +330,7 @@ mkdir baichuan-13B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -348,7 +348,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -412,7 +412,7 @@ The performance of the Baichuan-13B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan-13B inference script: tasks/inference/generate_baichuan_13b_ptd.sh
+Config baichuan-13B inference script: examples/baichuan/generate_baichuan_13b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -421,9 +421,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan-13B inference script: tasks/inference/generate_baichuan_13b_ptd.sh
+Launch baichuan-13B inference script: examples/baichuan/generate_baichuan_13b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan_13b_ptd.sh
+bash examples/baichuan/generate_baichuan_13b_ptd.sh
 ```
 Some inference samples are as follows:
 ![Inference](../../sources/images/baichuan/baichuan_13B_inference.png)
@@ -444,7 +444,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_13B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_13B_ptd.sh
 ```
 
 <table>
diff --git a/tasks/evaluation/evaluate_baichuan_13B_ptd.sh b/examples/baichuan/evaluate_baichuan_13B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_baichuan_13B_ptd.sh
rename to examples/baichuan/evaluate_baichuan_13B_ptd.sh
index e7e9a845dd3..adb46c0083f 100644
--- a/tasks/evaluation/evaluate_baichuan_13B_ptd.sh
+++ b/examples/baichuan/evaluate_baichuan_13B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK \
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_baichuan_7B_ptd.sh b/examples/baichuan/evaluate_baichuan_7B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_baichuan_7B_ptd.sh
rename to examples/baichuan/evaluate_baichuan_7B_ptd.sh
index 3db1076b712..f0e11b37c08 100644
--- a/tasks/evaluation/evaluate_baichuan_7B_ptd.sh
+++ b/examples/baichuan/evaluate_baichuan_7B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK \
        --seq-length 2048 \
diff --git a/tasks/inference/generate_baichuan_13b_ptd.sh b/examples/baichuan/generate_baichuan_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan_13b_ptd.sh
rename to examples/baichuan/generate_baichuan_13b_ptd.sh
index f10f08b06da..a5412705836 100644
--- a/tasks/inference/generate_baichuan_13b_ptd.sh
+++ b/examples/baichuan/generate_baichuan_13b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_baichuan_7b_ptd.sh b/examples/baichuan/generate_baichuan_7b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan_7b_ptd.sh
rename to examples/baichuan/generate_baichuan_7b_ptd.sh
index d0caad80a00..8a76d60f341 100644
--- a/tasks/inference/generate_baichuan_7b_ptd.sh
+++ b/examples/baichuan/generate_baichuan_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/examples/baichuan2/README.md b/examples/baichuan2/README.md
index 820d8676b7d..4b4ebb178b7 100644
--- a/examples/baichuan2/README.md
+++ b/examples/baichuan2/README.md
@@ -100,7 +100,7 @@ mkdir baichuan2-7B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -118,7 +118,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -186,7 +186,7 @@ Baichuan2-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-首先需要配置baichuan2-7B的推理脚本: tasks/inference/generate_baichuan2_7b_ptd.sh
+首先需要配置baichuan2-7B的推理脚本: examples/baichuan2/generate_baichuan2_7b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -200,10 +200,11 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan2_7b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan2_7b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```
 
 推理的示例如下:
+
 ![Inference](../../sources/images/baichuan2/baichuan2_7B_inference.png)
 
 ## 评估
@@ -220,7 +221,7 @@ TASK="boolq"
 ```
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_7B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
 ```
 
 <table>
@@ -320,7 +321,7 @@ mkdir baichuan2-13B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -338,7 +339,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -404,7 +405,7 @@ Baichuan2-13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 
 ## 推理
 
-首先需要配置baichuan2-13B的推理脚本: tasks/inference/generate_baichuan2_13b_ptd.sh
+首先需要配置baichuan2-13B的推理脚本: examples/baichuan2/generate_baichuan2_13b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -418,10 +419,11 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan2_13b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan2_13b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```
 
 推理的示例如下:
+
 ![Inference](../../sources/images/baichuan2/baichuan2_13B_inference.png)
 
 ## 评估
@@ -440,7 +442,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
 ```
 
 <table>
diff --git a/examples/baichuan2/README_en.md b/examples/baichuan2/README_en.md
index e8a12cc25c8..d6954cc94a5 100644
--- a/examples/baichuan2/README_en.md
+++ b/examples/baichuan2/README_en.md
@@ -104,7 +104,7 @@ mkdir baichuan2-7B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -121,7 +121,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -190,7 +190,7 @@ The performance of Baichuan2-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan2-7B inference script: tasks/inference/generate_baichuan2_7b_ptd.sh
+Config baichuan2-7B inference script: examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -199,9 +199,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan2-7B inference script: tasks/inference/generate_baichuan2_7b_ptd.sh
+Launch baichuan2-7B inference script: examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan2_7b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```
 Some inference samples are as follows:
 ![Inference](../../sources/images/baichuan2/baichuan2_7B_inference.png)
@@ -220,7 +220,7 @@ TASK="boolq"
 ```
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
 ```
 
 <table>
@@ -324,7 +324,7 @@ mkdir baichuan2-13B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -342,7 +342,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -405,7 +405,7 @@ The performance of the Baichuan2-13B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan2-13B inference script: tasks/inference/generate_baichuan2_13b_ptd.sh
+Config baichuan2-13B inference script: examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -414,9 +414,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan2-13B inference script: tasks/inference/generate_baichuan2_13b_ptd.sh
+Launch baichuan2-13B inference script: examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan2_13b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```
 Some inference samples are as follows:
 ![Inference](../../sources/images/baichuan2/baichuan2_13B_inference.png)
@@ -435,7 +435,7 @@ TASK="boolq"
 ```
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
 ```
 
 <table>
diff --git a/tasks/evaluation/evaluate_baichuan2_13B_ptd.sh b/examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
similarity index 95%
rename from tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
rename to examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
index 9708a65f288..7be0e6f3212 100644
--- a/tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+++ b/examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_baichuan2_7B_ptd.sh b/examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_baichuan2_7B_ptd.sh
rename to examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
index 3cdba8eb2d0..f742d8ac893 100644
--- a/tasks/evaluation/evaluate_baichuan2_7B_ptd.sh
+++ b/examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/inference/generate_baichuan2_13b_ptd.sh b/examples/baichuan2/generate_baichuan2_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan2_13b_ptd.sh
rename to examples/baichuan2/generate_baichuan2_13b_ptd.sh
index 783edb8164e..76c9655a945 100644
--- a/tasks/inference/generate_baichuan2_13b_ptd.sh
+++ b/examples/baichuan2/generate_baichuan2_13b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_baichuan2_7b_ptd.sh b/examples/baichuan2/generate_baichuan2_7b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan2_7b_ptd.sh
rename to examples/baichuan2/generate_baichuan2_7b_ptd.sh
index f06b11100cc..61927d73eba 100644
--- a/tasks/inference/generate_baichuan2_7b_ptd.sh
+++ b/examples/baichuan2/generate_baichuan2_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/examples/bloom/README.md b/examples/bloom/README.md
index da3a7a7cb47..09319f46fcd 100644
--- a/examples/bloom/README.md
+++ b/examples/bloom/README.md
@@ -71,7 +71,7 @@ cd ..
 将模型权重文件从 HuggingFace权重 格式转化为 Megatron 权重
 ***（该场景一般用于使能开源的HuggingFace模型在Megatron上进行训练）***
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -87,7 +87,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -153,7 +153,7 @@ Bloom-7B
 
 
 ## Bloom-7B推理
-首先配置Bloom-7B 推理脚本: tasks/inference/generate_bloom_ptd_7B.sh 
+首先配置Bloom-7B 推理脚本: examples/bloom/generate_bloom_ptd_7B.sh 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -166,7 +166,7 @@ TOKENIZER_PATH="your tokenizer path"
 然后可直接启动generate_bloom_7b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_bloom_7b_ptd.sh
+bash examples/bloom/generate_bloom_7b_ptd.sh
 ```
 推理示例如下：
 
@@ -174,7 +174,7 @@ bash tasks/inference/generate_bloom_7b_ptd.sh
 
 ## Bloom-7B评测
 
-配置Bloom-7B 评估脚本: tasks/evaluation/evaluate_bloom_7b_ptd.sh
+配置Bloom-7B 评估脚本: examples/bloom/evaluate_bloom_7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -191,7 +191,7 @@ TASK="your task"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_7B_ptd.sh
+bash examples/bloom/evaluate_bloom_7B_ptd.sh
 ```
 MMLU评测得分
 
@@ -269,7 +269,7 @@ cd ..
 将模型权重文件从 HuggingFace权重 格式转化为 Megatron 权重
 ***（该场景一般用于使能开源的HuggingFace模型在Megatron上进行训练）***
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -288,7 +288,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -358,7 +358,7 @@ Bloom-176B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 
 
 ## 推理
-首先配置Bloom-176B 推理脚本: tasks/inference/generate_bloom_ptd_176B.sh 
+首先配置Bloom-176B 推理脚本: examples/bloom/generate_bloom_ptd_176B.sh 
 bloom 176b的推理需要5机，因此要用上面的  权重转换脚本重新切分，tp=8，pp=5
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -372,7 +372,7 @@ TOKENIZER_PATH="your tokenizer path"
 然后可直接启动generate_bloom_176b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_bloom_176b_ptd.sh
+bash examples/bloom/generate_bloom_176b_ptd.sh
 ```
 推理示例如下：
 
@@ -381,7 +381,7 @@ bash tasks/inference/generate_bloom_176b_ptd.sh
 
 ## 评估 
 
-配置Bloom-176B 评估脚本: tasks/evaluation/evaluate_bloom_176b_ptd.sh
+配置Bloom-176B 评估脚本: examples/bloom/evaluate_bloom_176b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -398,7 +398,7 @@ TASK="your task"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_176B_ptd.sh
+bash examples/bloom/evaluate_bloom_176B_ptd.sh
 ```
 评测得分
 
diff --git a/examples/bloom/README_en.md b/examples/bloom/README_en.md
index dc4389421a3..18444553a20 100644
--- a/examples/bloom/README_en.md
+++ b/examples/bloom/README_en.md
@@ -73,7 +73,7 @@ HuggingFace weights --> Megatron weights
 ***(This scenario is generally used to train open-source HuggingFace models on Megatron)***
 
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -89,7 +89,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -160,7 +160,7 @@ The performance of Bloom-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference Bloom-7B
-Config Bloom-7B inference script: tasks/inference/generate_bloom_7b_ptd.sh
+Config Bloom-7B inference script: examples/bloom/generate_bloom_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -169,16 +169,16 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model save ckpt path"
 TOKENIZER_PATH="your tokenizer path"
 ```
-Launch Bloom-7B inference script: tasks/inference/generate_bloom_7b_ptd.sh
+Launch Bloom-7B inference script: examples/bloom/generate_bloom_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_bloom_7b_ptd.sh
+bash examples/bloom/generate_bloom_7b_ptd.sh
 ```
 Some inference samples are as follows:
 
 ![Inference](../../sources/images/bloom/bloom7b-generate.png)
 
 ## Evaluation Bloom-7B
-Config Bloom-7B evaluation script: tasks/evaluation/evaluate_bloom_7B_ptd.sh
+Config Bloom-7B evaluation script: examples/bloom/evaluate_bloom_7B_ptd.sh
 
 ```bash
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -193,7 +193,7 @@ TASK="your task"
 Launch Bloom-7B evaluation script:
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_7B_ptd.sh
+bash examples/bloom/evaluate_bloom_7B_ptd.sh
 ```
 
 Evaluation results
@@ -274,7 +274,7 @@ HuggingFace weights --> Megatron weights
 ***(This scenario is generally used to train open-source HuggingFace models on Megatron)***
 
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -291,7 +291,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -361,7 +361,7 @@ The performance of Bloom-176B in **Ascend NPU** and **Reference**:
 
 
 ## Inference Bloom 176B
-Config Bloom-176B inference script: tasks/inference/generate_bloom_176b_ptd.sh
+Config Bloom-176B inference script: examples/bloom/generate_bloom_176b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -370,18 +370,19 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model save ckpt path"
 TOKENIZER_PATH="your tokenizer path"
 ```
-Launch Bloom-176B inference script: tasks/inference/generate_bloom_176b_ptd.sh
+Launch Bloom-176B inference script: examples/bloom/generate_bloom_176b_ptd.sh
+
 Bloom-176b needs 5 machines to inference, so you need to convert a new model, set
 tp=8, pp=5
 ```bash
-bash tasks/inference/generate_bloom_176b_ptd.sh
+bash examples/bloom/generate_bloom_176b_ptd.sh
 ```
 Some inference samples are as follows:
 
 ![Inference](../../sources/images/bloom/bloom176b-generate.png)
 
 ## Evaluation Bloom 176B
-Config Bloom-176B evaluation script: tasks/evaluation/evaluate_bloom_176B_ptd.sh
+Config Bloom-176B evaluation script: examples/bloom/evaluate_bloom_176B_ptd.sh
 
 ```bash
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -396,7 +397,7 @@ TASK="your task"
 Launch Bloom-176B evaluation script:
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_176B_ptd.sh
+bash examples/bloom/evaluate_bloom_176B_ptd.sh
 ```
 
 Evaluation results
diff --git a/tasks/evaluation/evaluate_bloom_176b_ptd.sh b/examples/bloom/evaluate_bloom_176b_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_bloom_176b_ptd.sh
rename to examples/bloom/evaluate_bloom_176b_ptd.sh
index dcf3f0f223b..b9f23513902 100644
--- a/tasks/evaluation/evaluate_bloom_176b_ptd.sh
+++ b/examples/bloom/evaluate_bloom_176b_ptd.sh
@@ -22,7 +22,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py \
     --tensor-model-parallel-size 8 \
     --pipeline-model-parallel-size 5 \
     --num-layers 70 \
diff --git a/tasks/evaluation/evaluate_bloom_7b_ptd.sh b/examples/bloom/evaluate_bloom_7b_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_bloom_7b_ptd.sh
rename to examples/bloom/evaluate_bloom_7b_ptd.sh
index c6218a0fd3b..8ed1d49947b 100644
--- a/tasks/evaluation/evaluate_bloom_7b_ptd.sh
+++ b/examples/bloom/evaluate_bloom_7b_ptd.sh
@@ -17,7 +17,7 @@ NODE_RANK=0
 NPUS_PER_NODE=8
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 # configure generation parameters
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --tensor-model-parallel-size 8 \
diff --git a/tasks/inference/generate_bloom_176b_ptd.sh b/examples/bloom/generate_bloom_176b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_bloom_176b_ptd.sh
rename to examples/bloom/generate_bloom_176b_ptd.sh
index 4703a7949b9..145a81bf3c0 100644
--- a/tasks/inference/generate_bloom_176b_ptd.sh
+++ b/examples/bloom/generate_bloom_176b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
     --tensor-model-parallel-size 8 \
     --pipeline-model-parallel-size 5 \
     --num-layers 70 \
diff --git a/tasks/inference/generate_bloom_7b_ptd.sh b/examples/bloom/generate_bloom_7b_ptd.sh
similarity index 92%
rename from tasks/inference/generate_bloom_7b_ptd.sh
rename to examples/bloom/generate_bloom_7b_ptd.sh
index 781d06b0d20..117b81df985 100644
--- a/tasks/inference/generate_bloom_7b_ptd.sh
+++ b/examples/bloom/generate_bloom_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
     --tensor-model-parallel-size 8 \
     --pipeline-model-parallel-size 1 \
     --sequence-parallel \
diff --git a/examples/intern/README.md b/examples/intern/README.md
index 8cc71e20fe6..4ce9264037c 100644
--- a/examples/intern/README.md
+++ b/examples/intern/README.md
@@ -122,7 +122,7 @@ cd ..
 ***（该场景一般用于使能开源的HuggingFace模型在Megatron上进行训练）***
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -140,7 +140,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -186,9 +186,9 @@ Internlm-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 #### 推理
 推理脚本</a>：
-tasks/inference/generate_lnternlm_7b_ptd.sh
+examples/intern/generate_internlm_7b_ptd.sh
 ```
-bash ./tasks/inference/generate_lnternlm_7b_ptd.sh
+bash ./examples/intern/generate_internlm_7b_ptd.sh
 ```
 推理举例：
 ![Internlm-7b-inference](../../sources/images/intern/intern_7B_inference.png)
@@ -197,9 +197,9 @@ bash ./tasks/inference/generate_lnternlm_7b_ptd.sh
 使用MMLU数据集评估模型。数据集[下载](https://huggingface.co/datasets/cais/mmlu)
 
 评估脚本</a>:
-tasks/evaluation/evaluate_internlm_7B_ptd.sh 
+examples/intern/evaluate_internlm_7B_ptd.sh 
 ```
-bash  tasks/evaluation/evaluate_internlm_7B_ptd.sh
+bash  examples/intern/evaluate_internlm_7B_ptd.sh
 ```
 InternLM-7B在**Ascend NPU**中的评测表现：
 
diff --git a/examples/intern/README_en.md b/examples/intern/README_en.md
index d137d7cb3a3..7664d4ca520 100644
--- a/examples/intern/README_en.md
+++ b/examples/intern/README_en.md
@@ -122,7 +122,7 @@ In order to adapt to the internlm-7B model, the following script is used to conv
 ***(This scenario is generally used to train open-source HuggingFace models on Megatron)***
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -140,7 +140,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -185,9 +185,9 @@ The performance of Internlm-7B in **Ascend NPU** and **Reference**:
 
 #### Inference
 Inference script</a>：
-tasks/inference/generate_lnternlm_7b_ptd.sh
+examples/intern/generate_lnternlm_7b_ptd.sh
 ```
-bash ./tasks/inference/generate_lnternlm_7b_ptd.sh
+bash ./examples/intern/generate_lnternlm_7b_ptd.sh
 ```
 
 Inference case:
@@ -197,9 +197,9 @@ Inference case:
 Evaluating the model using the MMLU dataset. dataset [download](https://huggingface.co/datasets/cais/mmlu)
 
 Evaluation script</a>:
-tasks/evaluation/evaluate_internlm_7B_ptd.sh 
+examples/intern/evaluate_internlm_7B_ptd.sh 
 ```
-bash  tasks/evaluation/evaluate_internlm_7B_ptd.sh
+bash  examples/intern/evaluate_internlm_7B_ptd.sh
 ```
 The evaluation performance of LLaMA-7B/13B in **Ascend NPU**:
 
diff --git a/tasks/evaluation/evaluate_internlm_7B_ptd.sh b/examples/intern/evaluate_internlm_7B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_internlm_7B_ptd.sh
rename to examples/intern/evaluate_internlm_7B_ptd.sh
index 23d32949f34..719a1cc7ef3 100644
--- a/tasks/evaluation/evaluate_internlm_7B_ptd.sh
+++ b/examples/intern/evaluate_internlm_7B_ptd.sh
@@ -22,7 +22,7 @@ NODE_RANK=0
 NPUS_PER_NODE=8
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 # configure generation parameters 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --add-qkv-bias \
diff --git a/tasks/inference/generate_lnternlm_7b_ptd.sh b/examples/intern/generate_internlm_7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_lnternlm_7b_ptd.sh
rename to examples/intern/generate_internlm_7b_ptd.sh
index c25481bfe04..632f79d9b18 100644
--- a/tasks/inference/generate_lnternlm_7b_ptd.sh
+++ b/examples/intern/generate_internlm_7b_ptd.sh
@@ -22,7 +22,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --add-qkv-bias \
diff --git a/examples/llama/README.md b/examples/llama/README.md
index 7fc40344ef9..7c8a69b5be2 100644
--- a/examples/llama/README.md
+++ b/examples/llama/README.md
@@ -95,7 +95,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
 # 权重格式转换
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 1 \
@@ -114,7 +114,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 # 权重格式转换
 # 单机8卡
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 1 \
@@ -131,7 +131,7 @@ LLaMA-7B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -146,7 +146,7 @@ LLaMA-13B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -305,13 +305,13 @@ SAVE_CHECKPOINT_PATH="your model ckpt save path"
 
 LLaMA-7B
 ```shell
-bash tasks/finetune/tune_llama_7b_ptd.sh
+bash examples/llama/tune_llama_7b_ptd.sh
 ```
 
 LLaMA-13B
 ```shell
 # 单机8卡
-bash tasks/finetune/tune_llama_13b_ptd.sh 
+bash examples/llama/tune_llama_13b_ptd.sh 
 ```
 
 ### 性能
@@ -335,7 +335,7 @@ LLaMA-7B/13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 我们支持使用 LLaMA-7B 和 LLaMA-13B 进行文本生成的推理。
 推理与预训练不同，比如我们需要加载预训练权重和输出样本的长度：
 
-配置LLaMA-7B推理脚本`tasks/inference/generate_llama_7b_ptd.sh`和LLaMA-13B推理脚本`tasks/inference/generate_llama_13b_ptd.sh`。
+配置LLaMA-7B推理脚本`examples/llama/generate_llama_7b_ptd.sh`和LLaMA-13B推理脚本`examples/llama/generate_llama_13b_ptd.sh`。
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -345,27 +345,27 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-7B:
 ```shell
-bash ./tasks/inference/generate_llama_7b_ptd.sh
+bash ./examples/llama/generate_llama_7b_ptd.sh
 ```
 
 LLaMA-13B:
 ```shell
-bash ./tasks/inference/generate_llama_13b_ptd.sh
+bash ./examples/llama/generate_llama_13b_ptd.sh
 ```
 
-配置LLaMA-7B lora推理脚本`tasks/inference/generate_llama_7b_lora_ptd.sh`和LLaMA-13B lora推理脚本`tasks/inference/generate_llama_13b_lora_ptd.sh`。
+配置LLaMA-7B lora推理脚本`examples/llama/generate_llama_7b_lora_ptd.sh`和LLaMA-13B lora推理脚本`examples/llama/generate_llama_13b_lora_ptd.sh`。
 ```bash
 # 修改lora权重路径
 CHECKPOINT_LORA="your lora model directory path"
 ```
 LLaMA-7B:
 ```shell
-bash ./tasks/inference/generate_llama_7b_lora_ptd.sh
+bash ./examples/llama/generate_llama_7b_lora_ptd.sh
 ```
 
 LLaMA-13B:
 ```shell
-bash ./tasks/inference/generate_llama_13b_lora_ptd.sh
+bash ./examples/llama/generate_llama_13b_lora_ptd.sh
 ```
 
 部分推理样本如下：
@@ -384,7 +384,7 @@ LLaMA-13B:
 我们使用 Boolq benchmark 来评估我们的模型。Benchmark下载[此处](https://huggingface.co/datasets/boolq)。
 
 
-配置LLaMA-7B评估脚本 `tasks/evaluation/evaluate_llama_7B_ptd.sh` 和 LLaMA-13B评估脚本 `tasks/evaluation/evaluate_llama_13B_ptd.sh`：
+配置LLaMA-7B评估脚本 `examples/llama/evaluate_llama_7B_ptd.sh` 和 LLaMA-13B评估脚本 `examples/llama/evaluate_llama_13B_ptd.sh`：
 
 修改权重路径, 词表路径和数据集任务路径：
 ```shell
@@ -401,8 +401,8 @@ TASK="boolq"
 
 开始评估：
 ```shell
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
-bash tasks/evaluation/evaluate_llama_13B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_13B_ptd.sh
 ```
 
 LLaMA-7B/13B在**Ascend NPU**中的评测表现：
@@ -511,7 +511,7 @@ python $SCRIPT_PATH \
 llama-65B
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -528,7 +528,7 @@ llama-33B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -543,7 +543,7 @@ llama-65B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -719,13 +719,13 @@ SAVE_CHECKPOINT_PATH="your model ckpt save path"
 
 LLaMA-33B
 ```shell
-bash tasks/finetune/tune_llama_33b_ptd.sh
+bash examples/llama/tune_llama_33b_ptd.sh
 ```
 
 LLaMA-65B
 ```shell
 # 双机16卡
-bash tasks/finetune/tune_llama_65b_ptd.sh 
+bash examples/llama/tune_llama_65b_ptd.sh 
 ```
 
 ### 性能
@@ -748,9 +748,9 @@ LLaMA-33B/65B在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 我们支持使用 LLaMA-33B 和 LLaMA-65B 进行文本生成的推理。
 推理与预训练不同，比如我们需要加载预训练权重和输出样本的长度：
 
-配置LLaMA-33B推理脚本`tasks/inference/generate_llama_33b_ptd.sh`。
+配置LLaMA-33B推理脚本`examples/llama/generate_llama_33b_ptd.sh`。
 
-配置LLaMA-65B推理脚本`tasks/inference/generate_llama_65b_ptd.sh`。
+配置LLaMA-65B推理脚本`examples/llama/generate_llama_65b_ptd.sh`。
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -760,16 +760,16 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-33B:
 ```shell
-bash ./tasks/inference/generate_llama_33b_ptd.sh
+bash ./examples/llama/generate_llama_33b_ptd.sh
 ```
 LLaMA-65B:
 ```shell
-bash ./tasks/inference/generate_llama_65b_ptd.sh
+bash ./examples/llama/generate_llama_65b_ptd.sh
 ```
 
-配置LLaMA-33B lora推理脚本`tasks/inference/generate_llama_33B_lora_ptd.sh`。
+配置LLaMA-33B lora推理脚本`examples/llama/generate_llama_33B_lora_ptd.sh`。
 
-配置LLaMA-65B lora推理脚本`tasks/inference/generate_llama_65B_lora_ptd.sh`。
+配置LLaMA-65B lora推理脚本`examples/llama/generate_llama_65B_lora_ptd.sh`。
 
 ```bash
 # 修改lora权重路径
@@ -777,12 +777,12 @@ CHECKPOINT_LORA="your lora model directory path"
 ```
 LLaMA-33B:
 ```shell
-bash ./tasks/inference/generate_llama_33b_lora_ptd.sh
+bash ./examples/llama/generate_llama_33b_lora_ptd.sh
 ```
 
 LLaMA-65B:
 ```shell
-bash ./tasks/inference/generate_llama_65b_lora_ptd.sh
+bash ./examples/llama/generate_llama_65b_lora_ptd.sh
 ```
 
 部分推理样本如下：
@@ -799,9 +799,9 @@ LLaMA-65B:
 
 我们使用 Boolq benchmark 来评估我们的模型。Benchmark下载[此处](https://huggingface.co/datasets/boolq)。
 
-配置LLaMA-33B评估脚本：tasks/evaluation/evaluate_llama_33B_ptd.sh
+配置LLaMA-33B评估脚本：examples/llama/evaluate_llama_33B_ptd.sh
 
-配置LLaMA-65B评估脚本：tasks/evaluation/evaluate_llama_65B_ptd.sh
+配置LLaMA-65B评估脚本：examples/llama/evaluate_llama_65B_ptd.sh
 
 修改权重路径, 词表路径和数据集任务路径：
 ```shell
@@ -819,9 +819,9 @@ TASK="boolq"
 
 ```shell
 # llama-33B评估
-bash tasks/evaluation/evaluate_llama_33B_ptd.sh
+bash examples/llama/evaluate_llama_33B_ptd.sh
 # llama-65B评估
-bash tasks/evaluation/evaluate_llama_65B_ptd.sh
+bash examples/llama/evaluate_llama_65B_ptd.sh
 ```
 
 LLaMA-33B和LLaMA-65B在**Ascend NPU**中的评测表现：
diff --git a/examples/llama/README_en.md b/examples/llama/README_en.md
index 765cdccbfde..4ad65869c5d 100644
--- a/examples/llama/README_en.md
+++ b/examples/llama/README_en.md
@@ -95,7 +95,7 @@ LLaMA-7B
 ```shell
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
     --target-tensor-parallel-size 1 \
@@ -109,7 +109,7 @@ LLaMA-13B
 ```shell
 # Single machine with 8p
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
     --target-tensor-parallel-size 1 \
@@ -127,7 +127,7 @@ LLaMA-7B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -142,7 +142,7 @@ LLaMA-13B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -300,13 +300,13 @@ Add the fine-tuning parameter `--finetune` so that fine-tuning starts from the f
 
 LLaMA-7B
 ```shell
-bash tasks/finetune/tune_llama_7b_ptd.sh
+bash examples/llama/tune_llama_7b_ptd.sh
 ```
 
 LLaMA-13B
 ```shell
 # 8p
-bash tasks/finetune/tune_llama_13b_ptd.sh 
+bash examples/llama/tune_llama_13b_ptd.sh 
 ```
 
 
@@ -330,7 +330,7 @@ The performance of LLaMA-7B/13B in **Ascend NPU** and **Reference**:
 We support ModelLink Inference for text generation with LLaMA-7B and LLaMA-13B.
 Inference different from pre-training, such as we need to Load pre-training checkpoint and the length of the output samples:
 
-Config LLaMA-7B inference script `tasks/inference/generate_llama_7b_ptd.sh` and LLaMA-13B inference script `tasks/inference/generate_llama_13b_ptd.sh`.
+Config LLaMA-7B inference script `examples/llama/generate_llama_7b_ptd.sh` and LLaMA-13B inference script `examples/llama/generate_llama_13b_ptd.sh`.
 
 ```shell
 # modify the model weight path and tokenizer path
@@ -340,12 +340,12 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-7B:
 ```shell
-bash ./tasks/inference/generate_llama_7b_ptd.sh
+bash ./examples/llama/generate_llama_7b_ptd.sh
 ```
 
 LLaMA-13B:
 ```shell
-bash ./tasks/inference/generate_llama_13b_ptd.sh
+bash ./examples/llama/generate_llama_13b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -363,7 +363,7 @@ LLaMA-13B:
 
 We use boolq benchmark to evaluate our model. Benchmark Download [here](https://huggingface.co/datasets/boolq).
 
-Config LLaMA-7B evaluation script `tasks/evaluation/evaluate_llama_7B_ptd.sh` and LLaMA-13B evaluation script `tasks/evaluation/evaluate_llama_13B_ptd.sh`:
+Config LLaMA-7B evaluation script `examples/llama/evaluate_llama_7B_ptd.sh` and LLaMA-13B evaluation script `examples/llama/evaluate_llama_13B_ptd.sh`:
 
 Modify checkpoint path, vocab path, dataset path and task:
 
@@ -386,8 +386,8 @@ Change the max new tokens:
 
 Start evaluation:
 ```shell
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
-bash tasks/evaluation/evaluate_llama_13B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_13B_ptd.sh
 ```
 
 The evaluation performance of LLaMA-7B/13B in **Ascend NPU**:
@@ -486,7 +486,7 @@ llama-33B
 ```shell
 mkdir model_weights
 
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 4 \
@@ -499,7 +499,7 @@ python tools/checkpoint/util.py --model-type GPT \
 llama-65B
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -516,7 +516,7 @@ LLaMA-33B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -531,7 +531,7 @@ LLaMA-65B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -696,12 +696,12 @@ Add the fine-tuning parameter `--finetune` so that fine-tuning starts from the f
 
 Launch llama-33B pre-training script : ModelLink/examples/llama/tune_llama_33B_ptd_32p.sh
 ```bash
-bash tasks/finetune/tune_llama_33B_ptd_32p.sh
+bash examples/llama/tune_llama_33B_ptd_32p.sh
 ```
 
 Launch llama-65B pre-training script : ModelLink/examples/llama/tune_llama_65b_ptd.sh
 ```bash
-bash tasks/finetune/tune_llama_65b_ptd.sh
+bash examples/llama/tune_llama_65b_ptd.sh
 ```
 Config llama-33B/65B pre-training script for multinode (Launch llama-65B pre-training script on each machine):
 
@@ -732,9 +732,9 @@ The performance of the NPUs in **Ascend** and Reference:
 We support ModelLink Inference for text generation with LLaMA-33B and LLaMA-65B.
 Inference different from pre-training, such as we need to Load pre-training checkpoint and the length of the output samples:
 
-Config LLaMA-33B inference script `tasks/inference/generate_llama_33b_ptd.sh`.
+Config LLaMA-33B inference script `examples/llama/generate_llama_33b_ptd.sh`.
 
-Config LLaMA-65B inference script `tasks/inference/generate_llama_65b_ptd.sh`.
+Config LLaMA-65B inference script `examples/llama/generate_llama_65b_ptd.sh`.
 
 ```shell
 # modify the model weight path and tokenizer path
@@ -744,11 +744,11 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-33B:
 ```shell
-bash ./tasks/inference/generate_llama_33b_ptd.sh
+bash ./examples/llama/generate_llama_33b_ptd.sh
 ```
 LLaMA-65B:
 ```shell
-bash ./tasks/inference/generate_llama_65b_ptd.sh
+bash ./examples/llama/generate_llama_65b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -766,9 +766,9 @@ LLaMA-65B:
 
 We use Boolq benchmark to evaluate our model. Benchmark Download [here](https://huggingface.co/datasets/boolq).
 
-Config LLaMA-33B evaluation script: tasks/evaluation/evaluate_llama_33B_ptd.sh
+Config LLaMA-33B evaluation script: examples/llama/evaluate_llama_33B_ptd.sh
 
-Config LLaMA-65B evaluation script: tasks/evaluation/evaluate_llama_65B_ptd.sh
+Config LLaMA-65B evaluation script: examples/llama/evaluate_llama_65B_ptd.sh
 
 Modify checkpoint path, vocab path, dataset path and task:
 
@@ -786,9 +786,9 @@ Change the max new tokens:
 ```shell
 # start evaluation
 # evaluate llama-33B
-bash tasks/evaluation/evaluate_llama_33B_ptd.sh
+bash examples/llama/evaluate_llama_33B_ptd.sh
 # evaluate llama-65B
-bash tasks/evaluation/evaluate_llama_65B_ptd.sh
+bash examples/llama/evaluate_llama_65B_ptd.sh
 ```
 
 The evaluation performance of LLaMA-7B/13B in **Ascend NPU**:
diff --git a/tasks/evaluation/evaluate_llama_13B_ptd.sh b/examples/llama/evaluate_llama_13B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama_13B_ptd.sh
rename to examples/llama/evaluate_llama_13B_ptd.sh
index 4807adb3463..da5f11e8ca7 100644
--- a/tasks/evaluation/evaluate_llama_13B_ptd.sh
+++ b/examples/llama/evaluate_llama_13B_ptd.sh
@@ -22,7 +22,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_llama_33B_ptd.sh b/examples/llama/evaluate_llama_33B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_llama_33B_ptd.sh
rename to examples/llama/evaluate_llama_33B_ptd.sh
index 80511ff556d..e07bbbe9cdb 100644
--- a/tasks/evaluation/evaluate_llama_33B_ptd.sh
+++ b/examples/llama/evaluate_llama_33B_ptd.sh
@@ -22,7 +22,7 @@ TOKENIZER_PATH="Your tokenizer path"
 DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_llama_65B_ptd.sh b/examples/llama/evaluate_llama_65B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama_65B_ptd.sh
rename to examples/llama/evaluate_llama_65B_ptd.sh
index 150209df85c..8ed13cc0aa4 100644
--- a/tasks/evaluation/evaluate_llama_65B_ptd.sh
+++ b/examples/llama/evaluate_llama_65B_ptd.sh
@@ -20,7 +20,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_llama_7B_ptd.sh b/examples/llama/evaluate_llama_7B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama_7B_ptd.sh
rename to examples/llama/evaluate_llama_7B_ptd.sh
index 4b9148bcd14..76dfcd3935e 100644
--- a/tasks/evaluation/evaluate_llama_7B_ptd.sh
+++ b/examples/llama/evaluate_llama_7B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/inference/generate_llama_13b_lora_ptd.sh b/examples/llama/generate_llama_13b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_13b_lora_ptd.sh
rename to examples/llama/generate_llama_13b_lora_ptd.sh
index c82a0daf88d..93d1b2fd69d 100644
--- a/tasks/inference/generate_llama_13b_lora_ptd.sh
+++ b/examples/llama/generate_llama_13b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 1  \
        --pipeline-model-parallel-size 8  \
        --num-layers 40 \
@@ -53,4 +53,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_13b_lora.log
diff --git a/tasks/inference/generate_llama_13b_ptd.sh b/examples/llama/generate_llama_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_13b_ptd.sh
rename to examples/llama/generate_llama_13b_ptd.sh
index 0b143e4d64f..47c6bd3f579 100644
--- a/tasks/inference/generate_llama_13b_ptd.sh
+++ b/examples/llama/generate_llama_13b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=4
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 2  \
        --pipeline-model-parallel-size 2  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_llama_33b_lora_ptd.sh b/examples/llama/generate_llama_33b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_33b_lora_ptd.sh
rename to examples/llama/generate_llama_33b_lora_ptd.sh
index 46b00020d0f..bfcb62c18e2 100644
--- a/tasks/inference/generate_llama_33b_lora_ptd.sh
+++ b/examples/llama/generate_llama_33b_lora_ptd.sh
@@ -26,7 +26,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 4  \
        --pipeline-model-parallel-size 2  \
        --num-layers 60 \
@@ -59,4 +59,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-r 16 \
        --lora-alpha 32 \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_33b_lora.log
diff --git a/tasks/inference/generate_llama_33b_ptd.sh b/examples/llama/generate_llama_33b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_33b_ptd.sh
rename to examples/llama/generate_llama_33b_ptd.sh
index 42df9a98240..36dc704251d 100644
--- a/tasks/inference/generate_llama_33b_ptd.sh
+++ b/examples/llama/generate_llama_33b_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 4  \
        --pipeline-model-parallel-size 2  \
        --num-layers 60  \
diff --git a/tasks/inference/generate_llama_65b_lora_ptd.sh b/examples/llama/generate_llama_65b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_65b_lora_ptd.sh
rename to examples/llama/generate_llama_65b_lora_ptd.sh
index 4cf0d723060..b61efd8b1ab 100644
--- a/tasks/inference/generate_llama_65b_lora_ptd.sh
+++ b/examples/llama/generate_llama_65b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 2  \
        --num-layers 80 \
@@ -53,4 +53,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_65b_lora.log
diff --git a/tasks/inference/generate_llama_65b_ptd.sh b/examples/llama/generate_llama_65b_ptd.sh
similarity index 93%
rename from tasks/inference/generate_llama_65b_ptd.sh
rename to examples/llama/generate_llama_65b_ptd.sh
index 1a1ec0c3ca6..423a7a400c8 100644
--- a/tasks/inference/generate_llama_65b_ptd.sh
+++ b/examples/llama/generate_llama_65b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 80 \
@@ -48,4 +48,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --no-gradient-accumulation-fusion \
        --exit-on-missing-checkpoint \
        --make-vocab-size-divisible-by 1 \
+       --inference_prompt_type 'alpaca' \
        | tee logs/generate_llama_65b.log
diff --git a/tasks/inference/generate_llama_7b_lora_ptd.sh b/examples/llama/generate_llama_7b_lora_ptd.sh
similarity index 92%
rename from tasks/inference/generate_llama_7b_lora_ptd.sh
rename to examples/llama/generate_llama_7b_lora_ptd.sh
index 66b8f52c73b..3fde0684402 100644
--- a/tasks/inference/generate_llama_7b_lora_ptd.sh
+++ b/examples/llama/generate_llama_7b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 1  \
        --pipeline-model-parallel-size 8  \
        --num-layers 32 \
@@ -53,4 +53,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_7b_lora.log
\ No newline at end of file
diff --git a/tasks/inference/generate_llama_7b_ptd.sh b/examples/llama/generate_llama_7b_ptd.sh
similarity index 92%
rename from tasks/inference/generate_llama_7b_ptd.sh
rename to examples/llama/generate_llama_7b_ptd.sh
index ff91d88d959..96a52d90fd7 100644
--- a/tasks/inference/generate_llama_7b_ptd.sh
+++ b/examples/llama/generate_llama_7b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=4
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 2  \
        --pipeline-model-parallel-size 2  \
        --num-layers 32 \
diff --git a/tasks/finetune/tune_llama_13b_ptd.sh b/examples/llama/tune_llama_13b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama_13b_ptd.sh
rename to examples/llama/tune_llama_13b_ptd.sh
diff --git a/tasks/finetune/tune_llama_33b_ptd.sh b/examples/llama/tune_llama_33b_ptd.sh
similarity index 96%
rename from tasks/finetune/tune_llama_33b_ptd.sh
rename to examples/llama/tune_llama_33b_ptd.sh
index 6f8afbd988c..daf0d758e83 100644
--- a/tasks/finetune/tune_llama_33b_ptd.sh
+++ b/examples/llama/tune_llama_33b_ptd.sh
@@ -47,10 +47,10 @@ GPT_ARGS="
     --seq-length 2048 \
     --max-position-embeddings 2048 \
     --micro-batch-size 2 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1.5e-4 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/tasks/finetune/tune_llama_65b_ptd.sh b/examples/llama/tune_llama_65b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama_65b_ptd.sh
rename to examples/llama/tune_llama_65b_ptd.sh
diff --git a/tasks/finetune/tune_llama_7b_ptd.sh b/examples/llama/tune_llama_7b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama_7b_ptd.sh
rename to examples/llama/tune_llama_7b_ptd.sh
diff --git a/examples/llama2/README.md b/examples/llama2/README.md
index 62c323ab556..7b885bb65a0 100755
--- a/examples/llama2/README.md
+++ b/examples/llama2/README.md
@@ -108,7 +108,7 @@ LLAMA2-7B 训练的硬件配置:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # 权重格式转换
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader llama2_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -125,7 +125,7 @@ LLAMA2-7B 训练的硬件配置:
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -232,10 +232,10 @@ python tools/checkpoint/util.py --model-type GPT \
      --lora-modules-to-save word_embeddings output_layer \
    ```
 
-   启动Lora微调脚本: tasks/finetune/tune_llama2_7b_ptd.sh
+   启动Lora微调脚本: examples/llama2/tune_llama2_7b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_7b_ptd.sh
+    bash examples/llama2/tune_llama2_7b_ptd.sh
    ```
 
 ### 性能
@@ -252,7 +252,7 @@ LLaMA2-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理-7B
 
-配置llama2-7B 推理脚本: tasks/inference/generate_llama2_7b_ptd.sh
+配置llama2-7B 推理脚本: examples/llama2/generate_llama2_7b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -264,7 +264,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 TOKENIZER_MODEL="your tokenizer.model file path"
 ```
 
-配置 LLaMA2-7B lora推理脚本: tasks/inference/generate_llama2_7b_lora_ptd.sh
+配置 LLaMA2-7B lora推理脚本: examples/llama2/generate_llama2_7b_lora_ptd.sh
 
 ```bash
 # 修改lora权重路径
@@ -274,12 +274,12 @@ CHECKPOINT_LORA="your lora model directory path"
 启动llama2-7B 推理脚本
 
 ```bash
-bash tasks/inference/generate_llama2_7b_ptd.sh
+bash examples/llama2/generate_llama2_7b_ptd.sh
 ```
 
 启动llama2-7B lora推理脚本
 ```bash
-bash tasks/inference/generate_llama2_7b_lora_ptd.sh
+bash examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```
 
 推理的示例如下:
@@ -288,7 +288,7 @@ bash tasks/inference/generate_llama2_7b_lora_ptd.sh
 ## 评估-7B
 
 使用 MMLU数据集评估模型. 数据集下载路径 [这里](https://huggingface.co/datasets/cais/mmlu). 
-配置llama2-7B 评估脚本: tasks/evaluation/evaluate_llama2_7B_ptd.sh
+配置llama2-7B 评估脚本: examples/llama2/evaluate_llama2_7B_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -305,7 +305,7 @@ TASK="mmlu"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_llama2_7B_ptd.sh
+bash examples/llama2/evaluate_llama2_7B_ptd.sh
 ```
 评估结果如下
 ```text
@@ -435,7 +435,7 @@ LLaMA2-13B 训练的硬件配置:
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
 # 权重格式转换
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
     --target-tensor-parallel-size 8 \
@@ -450,7 +450,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -557,10 +557,10 @@ python tools/checkpoint/util.py --model-type GPT \
      --lora-modules-to-save word_embeddings output_layer \
    ```
    
-   启动Lora微调脚本: tasks/finetune/tune_llama2_13b_ptd.sh
+   启动Lora微调脚本: examples/llama2/tune_llama2_13b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_13b_ptd.sh
+    bash examples/llama2/tune_llama2_13b_ptd.sh
    ```
 
 ### 性能
@@ -581,14 +581,14 @@ LLaMA2-13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 我们在Llama2 13B中支持推理来生成文本。
 推理不同于预训练，比如我们需要加载预训练检查点和输出样本的长度:
 
-配置 LLaMA2-13B 推理脚本: tasks/inference/generate_llama2_13b_ptd.sh
+配置 LLaMA2-13B 推理脚本: examples/llama2/generate_llama2_13b_ptd.sh
 
 ```shell
 # 修改模型权重路径以及词表路径
 CHECKPOINT=./llama2-13b-tp8-pp1/
 TOKENIZER_PATH=./llama2-13b-hf/
 ```
-配置 LLaMA2-13B lora推理脚本: tasks/inference/generate_llama2_13b_lora_ptd.sh
+配置 LLaMA2-13B lora推理脚本: examples/llama2/generate_llama2_13b_lora_ptd.sh
 
 ```bash
 # 修改lora权重路径
@@ -596,11 +596,11 @@ CHECKPOINT_LORA="your lora model directory path"
 ```
 启动推理脚本
 ```shell
-bash ./tasks/inference/generate_llama2_13b_ptd.sh
+bash ./examples/llama2/generate_llama2_13b_ptd.sh
 ```
 启动lora推理脚本
 ```shell
-bash ./tasks/inference/generate_llama2_13b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_13b_lora_ptd.sh
 ```
 推理结果示例如下:
 ![llama2-13B-generate.png](../../sources/images/llama2/llama2-13B-generate.png)
@@ -617,7 +617,7 @@ bash ./tasks/inference/generate_llama2_13b_lora_ptd.sh
 ```
 
 ```shell
-bash tasks/evaluation/evaluate_llama2_13B_ptd.sh
+bash examples/llama2/evaluate_llama2_13B_ptd.sh
 ```
 
 <table>
@@ -761,7 +761,7 @@ pip install -r requirements.txt
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # 权重格式转换
-    python tools/checkpoint/util.py \
+    python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -797,7 +797,7 @@ pip install -r requirements.txt
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -812,7 +812,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -957,16 +957,16 @@ python tools/checkpoint/util.py --model-type GPT \
      --lora-modules-to-save word_embeddings output_layer \
    ```
 
-   启动llama2-34B Lora微调脚本: tasks/finetune/tune_llama2_34b_ptd.sh
+   启动llama2-34B Lora微调脚本: examples/llama2/tune_llama2_34b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_34b_ptd.sh
+    bash examples/llama2/tune_llama2_34b_ptd.sh
    ```
    
-   启动llama2-70B Lora微调脚本: tasks/finetune/tune_llama2_70b_ptd.sh
+   启动llama2-70B Lora微调脚本: examples/llama2/tune_llama2_70b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_70b_ptd.sh
+    bash examples/llama2/tune_llama2_70b_ptd.sh
    ```
 
 ### 性能-2
@@ -990,9 +990,9 @@ LLaMA2-34B/70B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比
 
 配置推理脚本
 
-LLaMA2-34B:`tasks/inference/generate_llama2_34B_ptd.sh`。
+LLaMA2-34B:`examples/llama2/generate_llama2_34B_ptd.sh`。
 
-LLaMA2-70B:`tasks/inference/generate_llama2_70b_ptd.sh`。
+LLaMA2-70B:`examples/llama2/generate_llama2_70b_ptd.sh`。
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -1002,9 +1002,9 @@ TOKENIZER_PATH=<tokenizer-path>
 
 配置lora推理脚本
 
-LLaMA2-34B:`tasks/inference/generate_llama2_34b_lora_ptd.sh`。
+LLaMA2-34B:`examples/llama2/generate_llama2_34b_lora_ptd.sh`。
 
-LLaMA2-70B:`tasks/inference/generate_llama2_70b_lora_ptd.sh`。
+LLaMA2-70B:`examples/llama2/generate_llama2_70b_lora_ptd.sh`。
 
 ```bash
 # 修改lora权重路径
@@ -1013,19 +1013,19 @@ CHECKPOINT_LORA="your lora model directory path"
 
 LLaMA2-34B启动推理:
 ```shell
-bash ./tasks/inference/generate_llama2_34B_ptd.sh
+bash ./examples/llama2/generate_llama2_34B_ptd.sh
 ```
 LLaMA2-34B启动lora推理:
 ```shell
-bash ./tasks/inference/generate_llama2_34b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_34b_lora_ptd.sh
 ```
 LLaMA2-70B启动推理:
 ```shell
-bash ./tasks/inference/generate_llama2_70b_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_ptd.sh
 ```
 LLaMA2-70B启动lora推理:
 ```shell
-bash ./tasks/inference/generate_llama2_70b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_lora_ptd.sh
 ```
 
 LLaMA2-34B推理样例:
@@ -1042,9 +1042,9 @@ BoolQ数据集评估样例. 数据集[here](https://huggingface.co/datasets/bool
 
 配置评估脚本:
 
-LLaMA2-34B:`tasks/evaluation/evaluate_llama2_34B_ptd.sh`.
+LLaMA2-34B:`examples/llama2/evaluate_llama2_34B_ptd.sh`.
 
-LLaMA2-70B:`tasks/evaluation/evaluate_llama2_70B_ptd.sh`.
+LLaMA2-70B:`examples/llama2/evaluate_llama2_70B_ptd.sh`.
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -1054,11 +1054,11 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA2-34B评估:
 ```shell
-bash tasks/evaluation/evaluate_llama2_34B_ptd.sh
+bash examples/llama2/evaluate_llama2_34B_ptd.sh
 ```
 LLaMA2-70B评估:
 ```shell
-bash tasks/evaluation/evaluate_llama2_70B_ptd.sh
+bash examples/llama2/evaluate_llama2_70B_ptd.sh
 ```
 
 BoolQ 数据集评估结果:
diff --git a/examples/llama2/README_en.md b/examples/llama2/README_en.md
index de54cbda7d3..16ff99950f4 100644
--- a/examples/llama2/README_en.md
+++ b/examples/llama2/README_en.md
@@ -115,7 +115,7 @@ Here's a hardware summary of pre-training  LLAMA2-7B:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # convert to ptd weights
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader llama2_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -130,7 +130,7 @@ Here's a hardware summary of pre-training  LLAMA2-7B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -233,10 +233,10 @@ Here's a hardware summary of pre-training  LLAMA2-7B:
        --lora-load ${LORA_CHECKPOINT} \
    ```
    
-   Launch LLAMA2-7B lora fine tune script: tasks/finetune/tune_llama2_7b_ptd.sh
+   Launch LLAMA2-7B lora fine tune script: examples/finetune/tune_llama2_7b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_7b_ptd.sh 
+    bash examples/llama2/tune_llama2_7b_ptd.sh 
    ```
 
 ### Performance
@@ -253,7 +253,7 @@ The performance of LLaMA2-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference-7B
-Config llama2-7B inference script: tasks/inference/generate_llama2_7b_ptd.sh
+Config llama2-7B inference script: examples/llama2/generate_llama2_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -262,16 +262,16 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 TOKENIZER_PATH=./llama2-7b-hf/  #tokenizer path
 CHECKPOINT=./llama2-7b-tp8pp1  #model path
 ```
-Config llama2-7B lora inference script: tasks/inference/generate_llama2_7b_lora_ptd.sh
+Config llama2-7B lora inference script: examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```bash
 # modify lora model path
 CHECKPOINT_LORA="your lora model directory path"
 ```
-Launch llama2-7B inference script: tasks/inference/generate_llama2_7b_ptd.sh
+Launch llama2-7B inference script: examples/llama2/generate_llama2_7b_ptd.sh
 ```bash
 bash examples/llama2/generate_llama2_7b_ptd.sh
 ```
-Launch llama2-7B lora inference script: tasks/inference/generate_llama2_7b_lora_ptd.sh
+Launch llama2-7B lora inference script: examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```bash
 bash examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```
@@ -280,7 +280,7 @@ Some inference samples are as follows:
 
 ## Evaluation-7B
 We use MMLU benchmark to evaluate our model. Benchmark Download [here](https://huggingface.co/datasets/cais/mmlu). 
-Config llama2-7B evaluation script: tasks/evaluation/evaluate_llama2_7B_ptd.sh
+Config llama2-7B evaluation script: examples/llama2/evaluate_llama2_7B_ptd.sh
 
 ```bash
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -295,7 +295,7 @@ TASK="mmlu"
 Launch llama2-7B evaluation script:
 
 ```bash
-bash tasks/evaluation/evaluate_llama2_7B_ptd.sh
+bash examples/llama2/evaluate_llama2_7B_ptd.sh
 ```
 
 Evaluation results
@@ -425,7 +425,7 @@ Here's a hardware summary of pre-training  LLaMA2-13B:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
     # convert weights
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader llama2_hf \
         --saver megatron \
         --target-tensor-parallel-size 8 \
@@ -441,7 +441,7 @@ Here's a hardware summary of pre-training  LLaMA2-13B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -539,10 +539,10 @@ Here's a hardware summary of pre-training  LLaMA2-13B:
    ```
    
    
-   Launch LLAMA2-13B lora fine tune script: tasks/finetune/tune_llama2_13b_ptd.sh
+   Launch LLAMA2-13B lora fine tune script: examples/llama2/tune_llama2_13b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_13b_ptd.sh 
+    bash examples/llama2/tune_llama2_13b_ptd.sh 
    ```
 
 ### Performance
@@ -571,7 +571,7 @@ CHECKPOINT=./llama2-13b-tp8-pp1/
 TOKENIZER_PATH=./llama2-13b-hf/
 ```
 
-Config Llama2-13B lora inference script: tasks/inference/generate_llama2_13b_lora_ptd.sh
+Config Llama2-13B lora inference script: examples/llama2/generate_llama2_13b_lora_ptd.sh
 
 ```bash
 # modify lora model directory path
@@ -580,11 +580,11 @@ CHECKPOINT_LORA="your lora model directory path"
 
 Launch Llama2-13B inference script.
 ```shell
-bash ./tasks/inference/generate_llama2_13b_ptd.sh
+bash examples/llama2/generate_llama2_13b_ptd.sh
 ```
 Launch Llama2-13B lora inference script.
 ```shell
-bash ./tasks/inference/generate_llama2_13b_lora_ptd.sh
+bash examples/llama2/generate_llama2_13b_lora_ptd.sh
 ```
 Some inference samples are as follows:
 ![llama2-13B-generate.png](../../sources/images/llama2/llama2-13B-generate.png)
@@ -601,7 +601,7 @@ We use boolq benchmark to evaluate our model. Benchmark Download [here](https://
 ```
 
 ```shell
-bash tasks/evaluation/evaluate_llama2_13B_ptd.sh
+bash examples/llama2/evaluate_llama2_13B_ptd.sh
 ```
 
 <table>
@@ -742,7 +742,7 @@ pip install -r requirements.txt
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
     # convert to megatron weights
-    python tools/checkpoint/util.py \
+    python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -758,7 +758,7 @@ pip install -r requirements.txt
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
     # convert to megatron weights
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
      --loader llama2_hf \
      --saver megatron \
      --target-tensor-parallel-size 8 \
@@ -775,7 +775,7 @@ pip install -r requirements.txt
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -789,7 +789,7 @@ pip install -r requirements.txt
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -931,16 +931,16 @@ pip install -r requirements.txt
        --lora-load ${LORA_CHECKPOINT} \
    ```
    
-   Launch LLAMA2-34B lora fine tune script: tasks/finetune/tune_llama2_34b_ptd.sh
+   Launch LLAMA2-34B lora fine tune script: examples/llama2/tune_llama2_34b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_34b_ptd.sh 
+    bash examples/llama2/tune_llama2_34b_ptd.sh 
    ```
    
-   Launch LLAMA2-70B lora fine tune script: tasks/finetune/tune_llama2_70b_ptd.sh
+   Launch LLAMA2-70B lora fine tune script: examples/llama2/tune_llama2_70b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_70b_ptd.sh 
+    bash examples/llama2/tune_llama2_70b_ptd.sh 
    ```
 
 ### Performance-2
@@ -965,9 +965,9 @@ Models could generate with 8 NPUs, for example:
 
 Config inference script:
 
-LLaMA2-34B:`tasks/inference/generate_llama2_34B_ptd.sh`.
+LLaMA2-34B:`examples/llama2/generate_llama2_34B_ptd.sh`.
 
-LLaMA2-70B:`tasks/inference/generate_llama2_70b_ptd.sh`.
+LLaMA2-70B:`examples/llama2/generate_llama2_70b_ptd.sh`.
 
 ```shell
 # Modify checkpoint path and vocabfile path.
@@ -983,19 +983,19 @@ CHECKPOINT_LORA="your lora model directory path"
 
 Launch LLaMA2-34B inference:
 ```shell
-bash ./tasks/inference/generate_llama2_34B_ptd.sh
+bash ./examples/llama2/generate_llama2_34B_ptd.sh
 ```
 Launch LLaMA2-34B lora inference:
 ```shell
-bash ./tasks/inference/generate_llama2_34b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_34b_lora_ptd.sh
 ```
 Launch LLaMA2-70B inference:
 ```shell
-bash ./tasks/llama2/generate_llama2_70b_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_ptd.sh
 ```
 Launch LLaMA2-70B lora inference:
 ```shell
-bash ./tasks/llama2/generate_llama2_70b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_lora_ptd.sh
 ```
 
 Some inference samples of LLaMA2-34B are as follows:
@@ -1012,9 +1012,9 @@ Download dev part[here](https://storage.googleapis.com/boolq/dev.jsonl) and put
 
 Config evaluation script:
 
-LLaMA2-34B:`tasks/evaluation/evaluate_llama2_34B_ptd.sh`.
+LLaMA2-34B:`examples/llama2/evaluate_llama2_34B_ptd.sh`.
 
-LLaMA2-70B:`tasks/evaluation/evaluate_llama2_70B_ptd.sh`.
+LLaMA2-70B:`examples/llama2/evaluate_llama2_70B_ptd.sh`.
 
 ```shell
 # Modify checkpoint path and vocabfile path.
@@ -1024,11 +1024,11 @@ TOKENIZER_PATH=<tokenizer-path>
 
 Launch LLaMA2-34B evaluation:
 ```shell
-bash tasks/evaluation/evaluate_llama2_34B_ptd.sh
+bash examples/llama2/evaluate_llama2_34B_ptd.sh
 ```
 Launch LLaMA2-70B evaluation:
 ```shell
-bash tasks/evaluation/evaluate_llama2_70B_ptd.sh
+bash examples/llama2/evaluate_llama2_70B_ptd.sh
 ```
 
 Evaluation results with BoolQ dataset:
diff --git a/tasks/evaluation/evaluate_llama2_13B_ptd.sh b/examples/llama2/evaluate_llama2_13B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_llama2_13B_ptd.sh
rename to examples/llama2/evaluate_llama2_13B_ptd.sh
index 285c79ae30a..0244458ecac 100644
--- a/tasks/evaluation/evaluate_llama2_13B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_13B_ptd.sh
@@ -22,7 +22,7 @@ TOKENIZER_PATH="Your tokenizer path"
 DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_llama2_34B_ptd.sh b/examples/llama2/evaluate_llama2_34B_ptd.sh
similarity index 95%
rename from tasks/evaluation/evaluate_llama2_34B_ptd.sh
rename to examples/llama2/evaluate_llama2_34B_ptd.sh
index d4749e1148f..dbd2f822787 100644
--- a/tasks/evaluation/evaluate_llama2_34B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_34B_ptd.sh
@@ -22,7 +22,7 @@ TOKENIZER_PATH="Your tokenizer path"
 DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_llama2_70B_ptd.sh b/examples/llama2/evaluate_llama2_70B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama2_70B_ptd.sh
rename to examples/llama2/evaluate_llama2_70B_ptd.sh
index a9a28f3eb1c..ffcafd9385a 100644
--- a/tasks/evaluation/evaluate_llama2_70B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_70B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_llama2_7B_ptd.sh b/examples/llama2/evaluate_llama2_7B_ptd.sh
similarity index 95%
rename from tasks/evaluation/evaluate_llama2_7B_ptd.sh
rename to examples/llama2/evaluate_llama2_7B_ptd.sh
index cbab4e2fce0..606e7b0687d 100644
--- a/tasks/evaluation/evaluate_llama2_7B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_7B_ptd.sh
@@ -22,7 +22,7 @@ NODE_RANK=0
 NPUS_PER_NODE=1
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 # configure generation parameters
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/inference/generate_llama2_13b_lora_ptd.sh b/examples/llama2/generate_llama2_13b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_13b_lora_ptd.sh
rename to examples/llama2/generate_llama2_13b_lora_ptd.sh
index c26ab51893d..2ae5c480fde 100644
--- a/tasks/inference/generate_llama2_13b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_13b_lora_ptd.sh
@@ -26,7 +26,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
@@ -59,4 +59,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-r 16 \
        --lora-alpha 32 \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_13b_lora.log
diff --git a/tasks/inference/generate_llama2_13b_ptd.sh b/examples/llama2/generate_llama2_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_13b_ptd.sh
rename to examples/llama2/generate_llama2_13b_ptd.sh
index 4f204067367..0dbe63c9eb6 100644
--- a/tasks/inference/generate_llama2_13b_ptd.sh
+++ b/examples/llama2/generate_llama2_13b_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_llama2_34B_ptd.sh b/examples/llama2/generate_llama2_34B_ptd.sh
similarity index 95%
rename from tasks/inference/generate_llama2_34B_ptd.sh
rename to examples/llama2/generate_llama2_34B_ptd.sh
index ffacd7e2f4d..c89c331b546 100644
--- a/tasks/inference/generate_llama2_34B_ptd.sh
+++ b/examples/llama2/generate_llama2_34B_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 48  \
diff --git a/tasks/inference/generate_llama2_34b_lora_ptd.sh b/examples/llama2/generate_llama2_34b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_34b_lora_ptd.sh
rename to examples/llama2/generate_llama2_34b_lora_ptd.sh
index 548ffc48582..2884896b00b 100644
--- a/tasks/inference/generate_llama2_34b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_34b_lora_ptd.sh
@@ -26,7 +26,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 48  \
@@ -61,4 +61,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --make-vocab-size-divisible-by 1 \
        --group-query-attention \
        --num-query-groups 8 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_34b_lora.log
diff --git a/tasks/inference/generate_llama2_70b_lora_ptd.sh b/examples/llama2/generate_llama2_70b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_70b_lora_ptd.sh
rename to examples/llama2/generate_llama2_70b_lora_ptd.sh
index a5f53db5b39..a1de86d11c6 100644
--- a/tasks/inference/generate_llama2_70b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_70b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 2  \
        --num-layers 80 \
@@ -55,4 +55,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_70b_lora.log
diff --git a/tasks/inference/generate_llama2_70b_ptd.sh b/examples/llama2/generate_llama2_70b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_70b_ptd.sh
rename to examples/llama2/generate_llama2_70b_ptd.sh
index 0f5ba74a231..18af322850d 100644
--- a/tasks/inference/generate_llama2_70b_ptd.sh
+++ b/examples/llama2/generate_llama2_70b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 80 \
diff --git a/tasks/inference/generate_llama2_7b_lora_ptd.sh b/examples/llama2/generate_llama2_7b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_7b_lora_ptd.sh
rename to examples/llama2/generate_llama2_7b_lora_ptd.sh
index 3aaf738edb7..e2e723729b9 100644
--- a/tasks/inference/generate_llama2_7b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_7b_lora_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
@@ -55,4 +55,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_7b_lora.log
diff --git a/tasks/inference/generate_llama2_7b_ptd.sh b/examples/llama2/generate_llama2_7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_llama2_7b_ptd.sh
rename to examples/llama2/generate_llama2_7b_ptd.sh
index 3e4c5b5da14..23405e7ae3b 100644
--- a/tasks/inference/generate_llama2_7b_ptd.sh
+++ b/examples/llama2/generate_llama2_7b_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/tasks/finetune/tune_llama2_13b_ptd.sh b/examples/llama2/tune_llama2_13b_ptd.sh
similarity index 96%
rename from tasks/finetune/tune_llama2_13b_ptd.sh
rename to examples/llama2/tune_llama2_13b_ptd.sh
index c3bde4a2c7c..0b78c404fb5 100644
--- a/tasks/finetune/tune_llama2_13b_ptd.sh
+++ b/examples/llama2/tune_llama2_13b_ptd.sh
@@ -47,10 +47,10 @@ GPT_ARGS="
     --seq-length 4096 \
     --max-position-embeddings 4096 \
     --micro-batch-size 2 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1e-6 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/tasks/finetune/tune_llama2_34b_ptd.sh b/examples/llama2/tune_llama2_34b_ptd.sh
similarity index 96%
rename from tasks/finetune/tune_llama2_34b_ptd.sh
rename to examples/llama2/tune_llama2_34b_ptd.sh
index 2e69a9265ab..37c3a500668 100644
--- a/tasks/finetune/tune_llama2_34b_ptd.sh
+++ b/examples/llama2/tune_llama2_34b_ptd.sh
@@ -47,10 +47,10 @@ GPT_ARGS="
     --seq-length 4096 \
     --max-position-embeddings 4096 \
     --micro-batch-size 2 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1.5e-4 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/tasks/finetune/tune_llama2_70b_ptd.sh b/examples/llama2/tune_llama2_70b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama2_70b_ptd.sh
rename to examples/llama2/tune_llama2_70b_ptd.sh
diff --git a/tasks/finetune/tune_llama2_7b_ptd.sh b/examples/llama2/tune_llama2_7b_ptd.sh
similarity index 94%
rename from tasks/finetune/tune_llama2_7b_ptd.sh
rename to examples/llama2/tune_llama2_7b_ptd.sh
index b24b5068b41..6fe08e1c95a 100644
--- a/tasks/finetune/tune_llama2_7b_ptd.sh
+++ b/examples/llama2/tune_llama2_7b_ptd.sh
@@ -3,8 +3,6 @@
 export CUDA_DEVICE_MAX_CONNECTIONS=1
 export NPU_ASD_ENABLE=0
 
-source "./envs/ascend-toolkit/set_env.sh"
-
 GPUS_PER_NODE=8
 MASTER_ADDR=localhost
 MASTER_PORT=6001
@@ -45,10 +43,10 @@ GPT_ARGS="
     --seq-length 4096 \
     --max-position-embeddings 4096 \
     --micro-batch-size 4 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1.25e-6 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/examples/mixtral/README.md b/examples/mixtral/README.md
index 9582383611d..7778d9109bc 100644
--- a/examples/mixtral/README.md
+++ b/examples/mixtral/README.md
@@ -82,7 +82,7 @@
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # 权重格式转换
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -133,7 +133,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # HF 转 tp1-pp8-ep2
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -151,7 +151,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # tp1-pp8-ep2 转 tp1-pp8-ep1
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1-ep2-pp8 \
@@ -167,7 +167,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # tp1-pp8-ep2 转 HF
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --save-model-type huggingface \
@@ -233,6 +233,7 @@ python ./tools/preprocess_data.py \
 
 Mixtral-8x7B 在双机16卡上(ep2 pp8) **昇腾芯片** 和 **参考芯片** 上的性能对比：
 *(当节点够多的情况下，ep越大吞吐越大，这里并非为最佳性能，仅供参考)*
+
 | 设备 |   模型   | 迭代数 | 样本吞吐 (samples/step) | tokens吞吐 (tokens/s/p) | 单步迭代时间 (s/step) |
 | :--: | :-------: | :----: |:-------------------:|:---------------------:|:-------------------: |
 | NPUs | Mixtral-8x7B |  1000  |  4.11  |  1053.6  |  31.13  |
@@ -241,7 +242,7 @@ Mixtral-8x7B 在双机16卡上(ep2 pp8) **昇腾芯片** 和 **参考芯片** 
 
 ## 模型推理
 
-首先需要配置推理脚本: ***tasks/inference/generate_mixtral_8x7b_ptd.sh***
+首先需要配置推理脚本: ***examples/mixtral/generate_mixtral_8x7b_ptd.sh***
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -264,13 +265,13 @@ PP=1
 该文档中使用的Mixtral-8x7B-v0.1模型为L0模型，只有续写能力，推理不带任何模板并且容易出现重复或回答不停止的情况。
 
 如若想要有较好的人机对话能力，请使用Mixtral-8x7B-Instruct-v0.1模型，该模型为指令遵从度训练后需要配合模板使用，基本操作同上，仅启动入口有变化：
-torchrun $DISTRIBUTED_ARGS tasks/inference/inference_mixtral.py
+torchrun $DISTRIBUTED_ARGS inference.py
 ```
 
 然后可直接启动
 
 ```bash
-bash tasks/inference/generate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/generate_mixtral_8x7b_ptd.sh
 ```
 
 推理的示例如下:
@@ -279,7 +280,7 @@ bash tasks/inference/generate_mixtral_8x7b_ptd.sh
 ## 模型评估
 
 使用 MMLU数据集评估模型. 数据集下载路径 [这里](https://huggingface.co/datasets/cais/mmlu). 
-配置评估脚本: tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+配置评估脚本: examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -296,7 +297,7 @@ TASK="mmlu"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
 ```
 评估结果如下
 
diff --git a/examples/mixtral/README_en.md b/examples/mixtral/README_en.md
index 56787af90f0..9c5315d4189 100644
--- a/examples/mixtral/README_en.md
+++ b/examples/mixtral/README_en.md
@@ -81,7 +81,7 @@ Recommended hardware configuration for inference:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # Convert weight format
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -132,7 +132,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # HF to tp1-pp8-ep2
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -150,7 +150,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # tp1-pp8-ep2 to tp1-pp8-ep1
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1-ep2-pp8 \
@@ -166,7 +166,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # tp1-pp8-ep2 to HF
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --save-model-type huggingface \
@@ -240,7 +240,7 @@ Comparison of Mixtral-8x7B performance on 2 nodes and 16 chips with ep2 pp8:
 
 ## Model-Inference
 
-First, configure the inference script: ***tasks/inference/generate_mixtral_8x7b_ptd.sh***
+First, configure the inference script: ***examples/mixtral/generate_mixtral_8x7b_ptd.sh***
 
 ```bash
 # Execute set_env.sh according to your own ascend-toolkit path
@@ -263,13 +263,13 @@ PP=1
 The Mixtral-8x7B-v0.1 model used in this document is an L0 model, only with continuation ability, inference does not involve any templates and is prone to repetition or non-stop answering.
 
 If you want to have better human-machine dialogue capabilities, please use the Mixtral-8x7B-Instruct-v0.1 model. This model requires instruction compliance training and needs to be used with templates. The basic operations are the same as above, only the startup entry has changed:
-torchrun $DISTRIBUTED_ARGS tasks/inference/inference_mixtral.py
+torchrun $DISTRIBUTED_ARGS inference.py
 ```
 
 Then you can start it directly
 
 ```bash
-bash tasks/inference/generate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/generate_mixtral_8x7b_ptd.sh
 ```
 
 An example of inference is as follows:
@@ -278,7 +278,7 @@ An example of inference is as follows:
 ## Model-Evaluation
 
 Evaluate the model using the MMLU dataset. Dataset download path [here](https://huggingface.co/datasets/cais/mmlu). 
-Configure the evaluation script: ***tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh***
+Configure the evaluation script: ***examples/mixtral/evaluate_mixtral_8x7b_ptd.sh***
 
 ```bash
 # Ascend-toolkit path
@@ -296,7 +296,7 @@ TASK="mmlu"
 Start the evaluation
 
 ```bash
-bash tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
 ```
 The evaluation results are as follows
 
diff --git a/tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh b/examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
rename to examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
index a9f5c31faf6..1e43e25afc1 100644
--- a/tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+++ b/examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
@@ -66,7 +66,7 @@ MOE_ARGS="
     --moe-train-capacity-factor 8.0
 "
 
-torchrun $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py \
+torchrun $DISTRIBUTED_ARGS evaluation.py \
     $GPT_ARGS \
     $MOE_ARGS \
     --distributed-backend nccl | tee logs/evaluation_mixtral_${TASK}.log
diff --git a/tasks/inference/generate_mixtral_8x7b_ptd.sh b/examples/mixtral/generate_mixtral_8x7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_mixtral_8x7b_ptd.sh
rename to examples/mixtral/generate_mixtral_8x7b_ptd.sh
index 03252f0ab7e..eda91d14ed9 100644
--- a/tasks/inference/generate_mixtral_8x7b_ptd.sh
+++ b/examples/mixtral/generate_mixtral_8x7b_ptd.sh
@@ -62,8 +62,9 @@ MOE_ARGS="
     --moe-train-capacity-factor 8.0
 "
 
-torchrun $DISTRIBUTED_ARGS tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
     $GPT_ARGS \
     $MOE_ARGS \
     --distributed-backend nccl \
+    --inference_prompt_type 'mixtral' \
     | tee logs/generate_mixtral.log
diff --git a/examples/qwen/README.md b/examples/qwen/README.md
index 57f26d94fd2..c14415bc48c 100644
--- a/examples/qwen/README.md
+++ b/examples/qwen/README.md
@@ -118,7 +118,7 @@ Qwen-7B 训练的硬件配置:
    # 修改 ascend-toolkit 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader qwen_hf \
                                    --saver megatron \
                                    --target-tensor-parallel-size 8 \
@@ -135,7 +135,7 @@ Qwen-7B 训练的硬件配置:
    cd ModelLink/
    # 请按照您的真实环境修改 set_env.sh 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader megatron \
                                    --saver megatron \
                                    --save-model-type save_huggingface_qwen \
@@ -203,7 +203,7 @@ Qwen-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-配置 qwen-7b 推理脚本：tasks/inference/generate_qwen_7b_ptd.sh
+配置 qwen-7b 推理脚本：examples/qwen/generate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -216,7 +216,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 
 启动qwen-7b推理脚本
 ```bash
-bash tasks/inference/generate_qwen_7b_ptd.sh
+bash examples/qwen/generate_qwen_7b_ptd.sh
 ```
 
 推理示例如下：
@@ -227,7 +227,7 @@ bash tasks/inference/generate_qwen_7b_ptd.sh
 
 使用[CEval数据集](https://huggingface.co/datasets/ceval/ceval-exam)和[MMLU数据集](https://huggingface.co/datasets/cais/mmlu)评估模型.
 
-配置qwen-7b评估脚本: tasks/evaluation/evaluate_qwen_7b_ptd.sh
+配置qwen-7b评估脚本: examples/qwen/evaluate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -245,7 +245,7 @@ TASK="mmlu"  # ceval任务配置为 "ceval"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_qwen_7b_ptd.sh
+bash examples/qwen/evaluate_qwen_7b_ptd.sh
 ```
 
 | 数据集 | 总学科数 | 总问题数 |                                     参考准确率                                     | NPU准确率 |
@@ -348,7 +348,7 @@ Qwen-14B 训练的硬件配置:
    # 修改 ascend-toolkit 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader qwen_hf \
                                    --saver megatron \
                                    --target-tensor-parallel-size 8 \
@@ -363,7 +363,7 @@ Qwen-14B 训练的硬件配置:
    cd ModelLink/
    # 请按照您的真实环境修改 set_env.sh 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
       --loader megatron \
       --saver megatron \
       --save-model-type save_huggingface_qwen \
@@ -430,7 +430,7 @@ Qwen-14B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-配置 qwen-14b 推理脚本：tasks/inference/generate_qwen_14b_ptd.sh
+配置 qwen-14b 推理脚本：examples/qwen/generate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -443,7 +443,7 @@ TOKENIZER_PATH=./qwen-14b-hf
 
 启动qwen-14b推理脚本
 ```bash
-bash tasks/inference/generate_qwen_14b_ptd.sh
+bash examples/qwen/generate_qwen_14b_ptd.sh
 ```
 
 推理示例如下：
@@ -454,7 +454,7 @@ bash tasks/inference/generate_qwen_14b_ptd.sh
 
 使用[CEval数据集](https://huggingface.co/datasets/ceval/ceval-exam)和[MMLU数据集](https://huggingface.co/datasets/cais/mmlu)评估模型.
 
-配置qwen-14b评估脚本: tasks/evaluation/evaluate_qwen_14b_ptd.sh
+配置qwen-14b评估脚本: examples/qwen/evaluate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -471,7 +471,7 @@ TASK="mmlu"  # ceval任务配置为 "ceval"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_qwen_14b_ptd.sh
+bash examples/qwen/evaluate_qwen_14b_ptd.sh
 ```
 
 | 数据集 | 总学科数 | 总问题数 |                    参考准确率                     | NPU准确率 |
@@ -557,7 +557,7 @@ Qwen-72B 训练的硬件配置:
    # 修改 ascend-toolkit 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
       
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader qwen_hf \
                                    --saver megatron \
                                    --target-tensor-parallel-size 8 \
@@ -573,7 +573,7 @@ Qwen-72B 训练的硬件配置:
    cd ModelLink/
    # 请按照您的真实环境修改 set_env.sh 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
       --loader megatron \
       --saver megatron \
       --save-model-type save_huggingface_qwen \
@@ -650,7 +650,7 @@ Qwen-72B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-配置 qwen-72b 推理脚本：tasks/inference/generate_qwen_72b_ptd.sh
+配置 qwen-72b 推理脚本：examples/qwen/generate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -663,7 +663,7 @@ TOKENIZER_PATH=./qwen-72b-hf
 
 启动qwen-72b推理脚本
 ```bash
-bash tasks/inference/generate_qwen_72b_ptd.sh
+bash examples/qwen/generate_qwen_72b_ptd.sh
 ```
 
 推理示例如下：
@@ -674,7 +674,7 @@ bash tasks/inference/generate_qwen_72b_ptd.sh
 
 使用[CEval数据集](https://huggingface.co/datasets/ceval/ceval-exam)和[MMLU数据集](https://huggingface.co/datasets/cais/mmlu)评估模型.
 
-配置qwen-72b评估脚本: tasks/evaluation/evaluate_qwen_72b_ptd.sh
+配置qwen-72b评估脚本: examples/qwen/evaluate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -692,7 +692,7 @@ TASK="mmlu"  # ceval任务配置为 "ceval"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_qwen_72b_ptd.sh
+bash examples/qwen/evaluate_qwen_72b_ptd.sh
 ```
 
 | 数据集 | 总学科数 | 总问题数 |                    参考准确率                     | NPU准确率 |
diff --git a/examples/qwen/README_en.md b/examples/qwen/README_en.md
index 90fe6351d43..9d0de0d0655 100644
--- a/examples/qwen/README_en.md
+++ b/examples/qwen/README_en.md
@@ -120,7 +120,7 @@ Here's a hardware summary of pre-training  Qwen-7B:
     # modify the script according to your own ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader qwen_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -136,7 +136,7 @@ Here's a hardware summary of pre-training  Qwen-7B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader megatron \
                                     --saver megatron \
                                     --save-model-type save_huggingface_llama \
@@ -198,7 +198,7 @@ The performance of Qwen-7B in **Ascend NPU** and **Reference**:
 | Reference | Qwen-7B |             2867             |
 
 ## Inference
-Config qwen-7b inference script: tasks/inference/generate_qwen_7b_ptd.sh
+Config qwen-7b inference script: examples/qwen/generate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -209,9 +209,9 @@ CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
 
-Launch qwen-7b inference script: tasks/inference/generate_qwen_7b_ptd.sh
+Launch qwen-7b inference script: examples/qwen/generate_qwen_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_qwen_7b_ptd.sh
+bash examples/qwen/generate_qwen_7b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -221,7 +221,7 @@ Some inference samples are as follows:
 ## Evaluation
 We use the [CEval benchmark](https://huggingface.co/datasets/ceval/ceval-exam) and [MMLU benchmark](https://huggingface.co/datasets/cais/mmlu) to evaluate our model. 
 
-Config qwen-7b evaluation script: tasks/evaluation/evaluate_qwen_7b_ptd.sh
+Config qwen-7b evaluation script: examples/qwen/evaluate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -239,7 +239,7 @@ TASK="mmlu"  # "ceval" for ceval task
 Launch qwen-7b evaluation
 
 ```bash
-bash ./tasks/evaluation/evaluate_qwen_7b_ptd.sh
+bash examples/qwen/evaluate_qwen_7b_ptd.sh
 ```
 
 | Task | Subset | Question | OpenSource | NPU |
@@ -345,7 +345,7 @@ Here's a hardware summary of pre-training  Qwen-14B:
     # modify the script according to your own ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader qwen_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -360,7 +360,7 @@ Here's a hardware summary of pre-training  Qwen-14B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader megatron \
                                     --saver megatron \
                                     --save-model-type save_huggingface_llama \
@@ -423,7 +423,7 @@ The performance of Qwen-14B in **Ascend NPU** and **Reference**:
 
 ## Inference
 
-Config qwen-14b inference script: tasks/inference/generate_qwen_14b_ptd.sh
+Config qwen-14b inference script: examples/qwen/generate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -434,9 +434,9 @@ CHECKPOINT="your model directory path"
 TOKENIZER_PATH=./qwen-14b-hf
 ```
 
-Launch qwen-14b inference script: tasks/inference/generate_qwen_14b_ptd.sh
+Launch qwen-14b inference script: examples/qwen/generate_qwen_14b_ptd.sh
 ```bash
-bash tasks/inference/generate_qwen_7b_ptd.sh
+bash examples/qwen/generate_qwen_7b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -447,7 +447,7 @@ Some inference samples are as follows:
 
 We use the [CEval benchmark](https://huggingface.co/datasets/ceval/ceval-exam) and [MMLU benchmark](https://huggingface.co/datasets/cais/mmlu) to evaluate our model. 
 
-Config qwen-14b evaluation script: tasks/evaluation/evaluate_qwen_14b_ptd.sh
+Config qwen-14b evaluation script: examples/qwen/evaluate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -465,7 +465,7 @@ TASK="mmlu"  # "ceval" for ceval task
 Launch qwen-14b evaluation
 
 ```bash
-bash ./tasks/evaluation/evaluate_qwen_14b_ptd.sh
+bash examples/qwen/evaluate_qwen_14b_ptd.sh
 ```
 
 | Task | Subset | Question | OpenSource | NPU |
@@ -551,7 +551,7 @@ Here's a hardware summary of pre-training  Qwen-72B:
     # modify the script according to your own ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader qwen_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -567,7 +567,7 @@ Here's a hardware summary of pre-training  Qwen-72B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader megatron \
                                     --saver megatron \
                                     --save-model-type save_huggingface_llama \
@@ -640,7 +640,7 @@ The performance of Qwen-72B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config qwen-72b inference script: tasks/inference/generate_qwen_72b_ptd.sh
+Config qwen-72b inference script: examples/qwen/generate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -651,9 +651,9 @@ CHECKPOINT="your model directory path"
 TOKENIZER_PATH=./qwen-72b-hf
 ```
 
-Launch qwen-72b inference script: tasks/inference/generate_qwen_72b_ptd.sh
+Launch qwen-72b inference script: examples/qwen/generate_qwen_72b_ptd.sh
 ```bash
-bash tasks/inference/generate_qwen_72b_ptd.sh
+bash examples/qwen/generate_qwen_72b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -663,7 +663,7 @@ Some inference samples are as follows:
 ## Evaluation
 We use the [CEval benchmark](https://huggingface.co/datasets/ceval/ceval-exam) and [MMLU benchmark](https://huggingface.co/datasets/cais/mmlu) to evaluate our model. 
 
-Config qwen-72b evaluation script: tasks/evaluation/evaluate_qwen_72b_ptd.sh
+Config qwen-72b evaluation script: examples/qwen/evaluate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -681,7 +681,7 @@ TASK="mmlu"  # "ceval" for ceval task
 Launch qwen-72b evaluation
 
 ```bash
-bash ./tasks/evaluation/evaluate_qwen_72b_ptd.sh
+bash examples/qwen/evaluate_qwen_72b_ptd.sh
 ```
 
 | Task | Subset | Question | OpenSource | NPU |
diff --git a/tasks/evaluation/evaluate_qwen_14b_ptd.sh b/examples/qwen/evaluate_qwen_14b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_qwen_14b_ptd.sh
rename to examples/qwen/evaluate_qwen_14b_ptd.sh
index 1a7cce307aa..decf4062382 100644
--- a/tasks/evaluation/evaluate_qwen_14b_ptd.sh
+++ b/examples/qwen/evaluate_qwen_14b_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./mmlu/data/test"
 TASK="mmlu"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-torchrun $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py   \
+torchrun $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_qwen_72b_ptd.sh b/examples/qwen/evaluate_qwen_72b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_qwen_72b_ptd.sh
rename to examples/qwen/evaluate_qwen_72b_ptd.sh
index d0a3128b680..9704dfa8198 100644
--- a/tasks/evaluation/evaluate_qwen_72b_ptd.sh
+++ b/examples/qwen/evaluate_qwen_72b_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./mmlu/data/test"
 TASK="mmlu"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-torchrun $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py   \
+torchrun $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 8192 \
diff --git a/tasks/evaluation/evaluate_qwen_7b_ptd.sh b/examples/qwen/evaluate_qwen_7b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_qwen_7b_ptd.sh
rename to examples/qwen/evaluate_qwen_7b_ptd.sh
index 77497e8eea7..6ff7aa03576 100644
--- a/tasks/evaluation/evaluate_qwen_7b_ptd.sh
+++ b/examples/qwen/evaluate_qwen_7b_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./mmlu/data/test"
 TASK="mmlu"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-torchrun $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py   \
+torchrun $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task ${TASK}\
        --seq-length 8192 \
diff --git a/tasks/inference/generate_qwen_14b_ptd.sh b/examples/qwen/generate_qwen_14b_ptd.sh
similarity index 96%
rename from tasks/inference/generate_qwen_14b_ptd.sh
rename to examples/qwen/generate_qwen_14b_ptd.sh
index 8ad2da74c57..d06fa3a9330 100644
--- a/tasks/inference/generate_qwen_14b_ptd.sh
+++ b/examples/qwen/generate_qwen_14b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-torchrun $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_qwen_72b_ptd.sh b/examples/qwen/generate_qwen_72b_ptd.sh
similarity index 96%
rename from tasks/inference/generate_qwen_72b_ptd.sh
rename to examples/qwen/generate_qwen_72b_ptd.sh
index 678f4209017..03763108814 100644
--- a/tasks/inference/generate_qwen_72b_ptd.sh
+++ b/examples/qwen/generate_qwen_72b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-torchrun $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 80 \
diff --git a/tasks/inference/generate_qwen_7b_ptd.sh b/examples/qwen/generate_qwen_7b_ptd.sh
similarity index 96%
rename from tasks/inference/generate_qwen_7b_ptd.sh
rename to examples/qwen/generate_qwen_7b_ptd.sh
index 54c0e8776e8..1f067c21abc 100644
--- a/tasks/inference/generate_qwen_7b_ptd.sh
+++ b/examples/qwen/generate_qwen_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-torchrun $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/tasks/inference/inference_alpaca.py b/inference.py
similarity index 68%
rename from tasks/inference/inference_alpaca.py
rename to inference.py
index c44c46e5973..3f0da94ba56 100644
--- a/tasks/inference/inference_alpaca.py
+++ b/inference.py
@@ -20,7 +20,7 @@ from megatron import get_args
 from megatron.model import GPTModel
 from megatron.initialize import initialize_megatron
 from megatron.arguments import core_transformer_config_from_args
-from tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
+from modellink.tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
 
 
 def model_provider(pre_process=True, post_process=True):
@@ -47,10 +47,17 @@ if __name__ == "__main__":
         pretrained_model_name_or_path=args.load
     )
 
-    system_template = "Below is an instruction that describes a task, paired with an input that provides further " \
-                      "context. Write a response that appropriately completes the request. " \
-                      "Please note that you need to think through your response logically and step by step.\n\n"
-    dialog_template = "### Instruction:\n{instruction}\n\n### Response:"
-    template = system_template + dialog_template
+    system_template = ""
+    dialog_template = "{instruction}"
+
+    if args.inference_prompt_type == 'alpaca':
+        system_template = "Below is an instruction that describes a task, paired with an input that provides further " \
+                          "context. Write a response that appropriately completes the request. " \
+                          "Please note that you need to think through your response logically and step by step.\n\n"
+        dialog_template = "### Instruction:\n{instruction}\n\n### Response:"
+
+    elif args.inference_prompt_type == 'mixtral':
+        system_template = "<s>"
+        dialog_template = "[INST] {instruction} [/INST] "
 
     task_factory(args, model, system_template=system_template, dialog_template=dialog_template)
diff --git a/modellink/checkpointing.py b/modellink/checkpointing.py
index 74cda6e0aa1..b8be1adcfa6 100644
--- a/modellink/checkpointing.py
+++ b/modellink/checkpointing.py
@@ -18,7 +18,7 @@ import os
 from functools import wraps
 from megatron.checkpointing import _load_base_checkpoint
 from megatron import get_args
-from tasks.finetune.lora.utils import is_enable_lora, merge_dicts, modify_keys_with_dict
+from .tasks.finetune.lora.utils import is_enable_lora, merge_dicts, modify_keys_with_dict
 
 
 def _load_base_checkpoint_wrapper(fn):
diff --git a/modellink/model/gpt_model.py b/modellink/model/gpt_model.py
index 733e10e56fe..2537722ebc0 100644
--- a/modellink/model/gpt_model.py
+++ b/modellink/model/gpt_model.py
@@ -18,7 +18,7 @@ from megatron.model.module import MegatronModule
 from megatron.model.enums import AttnMaskType
 from megatron.model.language_model import get_language_model
 from megatron.model.gpt_model import post_language_model_processing
-from tasks.inference.text_generation import MegatronModuleForCausalLM
+from ..tasks.inference.text_generation import MegatronModuleForCausalLM
 
 
 class GPTModel(MegatronModule, MegatronModuleForCausalLM):
diff --git a/modellink/model/transformer.py b/modellink/model/transformer.py
index f81e447a670..87ff0d20d7f 100644
--- a/modellink/model/transformer.py
+++ b/modellink/model/transformer.py
@@ -34,7 +34,7 @@ from megatron.model.utils import get_norm
 
 from modellink.error_utils import ensure_valid
 from modellink.model.alibi import Alibi, _build_alibi_tensor, _get_inverted_mask
-from tasks.finetune.lora.utils import is_enable_lora
+from ..tasks.finetune.lora.utils import is_enable_lora
 
 try:
     from einops import rearrange
diff --git a/modellink/tasks/__init__.py b/modellink/tasks/__init__.py
new file mode 100644
index 00000000000..9a1307aeca2
--- /dev/null
+++ b/modellink/tasks/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/evaluation/eval_impl/template.py b/modellink/tasks/evaluation/__init__.py
similarity index 55%
rename from tasks/evaluation/eval_impl/template.py
rename to modellink/tasks/evaluation/__init__.py
index 75f77cf3a60..aaf493892e8 100644
--- a/tasks/evaluation/eval_impl/template.py
+++ b/modellink/tasks/evaluation/__init__.py
@@ -12,10 +12,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-GSM8K_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json'
-MMLU_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json'
-CEVAL_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json'
-AGIEVAL_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json'
-BBH_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson'
-CODE_TEST_LOG_DIR = 'tasks/evaluation/codecheck_log'
diff --git a/modellink/tasks/evaluation/eval_api/__init__.py b/modellink/tasks/evaluation/eval_api/__init__.py
new file mode 100644
index 00000000000..9a1307aeca2
--- /dev/null
+++ b/modellink/tasks/evaluation/eval_api/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/evaluation/eval_api/chat.py b/modellink/tasks/evaluation/eval_api/chat.py
similarity index 100%
rename from tasks/evaluation/eval_api/chat.py
rename to modellink/tasks/evaluation/eval_api/chat.py
diff --git a/tasks/evaluation/eval_api/dataset_eval.py b/modellink/tasks/evaluation/eval_api/dataset_eval.py
similarity index 96%
rename from tasks/evaluation/eval_api/dataset_eval.py
rename to modellink/tasks/evaluation/eval_api/dataset_eval.py
index 02efe6fb682..bc5e42e7abf 100644
--- a/tasks/evaluation/eval_api/dataset_eval.py
+++ b/modellink/tasks/evaluation/eval_api/dataset_eval.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from abc import abstractmethod, ABCMeta
-from tasks.evaluation.eval_api.chat import Chat
+from .chat import Chat
 
 
 class DatasetEval(metaclass=ABCMeta):
diff --git a/modellink/tasks/evaluation/eval_impl/__init__.py b/modellink/tasks/evaluation/eval_impl/__init__.py
new file mode 100644
index 00000000000..9a1307aeca2
--- /dev/null
+++ b/modellink/tasks/evaluation/eval_impl/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/evaluation/eval_impl/agi_eval.py b/modellink/tasks/evaluation/eval_impl/agi_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/agi_eval.py
rename to modellink/tasks/evaluation/eval_impl/agi_eval.py
index 55ab368162c..9d8c3f30972 100644
--- a/tasks/evaluation/eval_impl/agi_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/agi_eval.py
@@ -17,10 +17,10 @@ import logging
 import json
 import pandas as pd
 import tqdm
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import AGIEVAL_TEMPLATE_DIR
-from modellink.error_utils import check_divisible_by_zero
+from .template import AGIEVAL_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/bbh_eval.py b/modellink/tasks/evaluation/eval_impl/bbh_eval.py
similarity index 94%
rename from tasks/evaluation/eval_impl/bbh_eval.py
rename to modellink/tasks/evaluation/eval_impl/bbh_eval.py
index 761346d3d1b..9ab8eefd60d 100644
--- a/tasks/evaluation/eval_impl/bbh_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/bbh_eval.py
@@ -17,10 +17,11 @@ import logging
 import json
 import pandas as pd
 import tqdm
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import BBH_TEMPLATE_DIR
-from modellink.error_utils import check_divisible_by_zero
+
+from .template import BBH_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/boolq_eval.py b/modellink/tasks/evaluation/eval_impl/boolq_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/boolq_eval.py
rename to modellink/tasks/evaluation/eval_impl/boolq_eval.py
index ecc65cf11a9..c81fd59909a 100644
--- a/tasks/evaluation/eval_impl/boolq_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/boolq_eval.py
@@ -19,9 +19,10 @@ import json
 import pandas as pd
 import tqdm
 
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
+
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/ceval_exam.py b/modellink/tasks/evaluation/eval_impl/ceval_exam.py
similarity index 94%
rename from tasks/evaluation/eval_impl/ceval_exam.py
rename to modellink/tasks/evaluation/eval_impl/ceval_exam.py
index 2fb0902d940..2c3a2821bb5 100644
--- a/tasks/evaluation/eval_impl/ceval_exam.py
+++ b/modellink/tasks/evaluation/eval_impl/ceval_exam.py
@@ -18,10 +18,11 @@ import json
 import pandas as pd
 import tqdm
 
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import CEVAL_TEMPLATE_DIR
+from .template import CEVAL_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
+
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json
diff --git a/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson b/modellink/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson
diff --git a/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json
diff --git a/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json
diff --git a/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json
diff --git a/tasks/evaluation/eval_impl/gsm8k_eval.py b/modellink/tasks/evaluation/eval_impl/gsm8k_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/gsm8k_eval.py
rename to modellink/tasks/evaluation/eval_impl/gsm8k_eval.py
index 5f7e9e46b56..1a9d825c52a 100644
--- a/tasks/evaluation/eval_impl/gsm8k_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/gsm8k_eval.py
@@ -19,10 +19,11 @@ import logging
 import json
 import pandas as pd
 import tqdm
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import GSM8K_TEMPLATE_DIR
+
+from .template import GSM8K_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/human_eval.py b/modellink/tasks/evaluation/eval_impl/human_eval.py
similarity index 94%
rename from tasks/evaluation/eval_impl/human_eval.py
rename to modellink/tasks/evaluation/eval_impl/human_eval.py
index 268ffd9cd20..f86054f678e 100644
--- a/tasks/evaluation/eval_impl/human_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/human_eval.py
@@ -16,17 +16,17 @@
 import json
 import os
 import logging
-import stat
 import re
 import sys
 import subprocess
 from typing import Iterable, Dict
 import pandas as pd
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import CODE_TEST_LOG_DIR
-from modellink.error_utils import check_divisible_by_zero
-from modellink.utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES
+
+from .template import CODE_TEST_LOG_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
+from ....utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/mmlu_eval.py b/modellink/tasks/evaluation/eval_impl/mmlu_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/mmlu_eval.py
rename to modellink/tasks/evaluation/eval_impl/mmlu_eval.py
index a4d00cb9bcb..aa2d85ab5ab 100644
--- a/tasks/evaluation/eval_impl/mmlu_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/mmlu_eval.py
@@ -20,10 +20,10 @@ import json
 import pandas as pd
 import tqdm
 
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import MMLU_TEMPLATE_DIR
+from .template import MMLU_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/modellink/tasks/evaluation/eval_impl/template.py b/modellink/tasks/evaluation/eval_impl/template.py
new file mode 100644
index 00000000000..488fcb017b0
--- /dev/null
+++ b/modellink/tasks/evaluation/eval_impl/template.py
@@ -0,0 +1,21 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+GSM8K_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json'
+MMLU_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json'
+CEVAL_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json'
+AGIEVAL_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json'
+BBH_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson'
+CODE_TEST_LOG_DIR = 'modellink/tasks/evaluation/codecheck_log'
diff --git a/modellink/tasks/evaluation/utils.py b/modellink/tasks/evaluation/utils.py
new file mode 100644
index 00000000000..f2a147659a1
--- /dev/null
+++ b/modellink/tasks/evaluation/utils.py
@@ -0,0 +1,37 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def add_text_generate_args(parser):
+    group = parser.add_argument_group(title='text generation')
+    group.add_argument("--task-data-path",
+                       nargs='*',
+                       default=[],
+                       help='Path to the training dataset. Accepted format:'
+                            '1) a single data path, 2) multiple datasets in the'
+                            'form: dataset1-path dataset2-path ...')
+    group.add_argument("--temperature", type=float, default=0.5,
+                       help='Sampling temperature.')
+    group.add_argument("--evaluation-batch-size", type=int, default=1,
+                       help='Size of evaluation batch')
+    group.add_argument("--greedy", action='store_true', default=False,
+                       help='Use greedy sampling.')
+    group.add_argument("--top-p", type=float, default=0.9,
+                       help='Top p sampling.')
+    group.add_argument("--top-k", type=int, default=0,
+                       help='Top k sampling.')
+    group.add_argument("--max-new-tokens", type=int, default=128,
+                       help='Size of the output generated text.')
+    group.add_argument("--task", nargs='*', default=[], help='Choose one task from mmlu, boolq and gsm8k')
+    return parser
diff --git a/tasks/__init__.py b/modellink/tasks/finetune/__init__.py
similarity index 100%
rename from tasks/__init__.py
rename to modellink/tasks/finetune/__init__.py
diff --git a/tasks/evaluation/__init__.py b/modellink/tasks/finetune/lora/__init__.py
similarity index 100%
rename from tasks/evaluation/__init__.py
rename to modellink/tasks/finetune/lora/__init__.py
diff --git a/tasks/finetune/lora/utils.py b/modellink/tasks/finetune/lora/utils.py
similarity index 100%
rename from tasks/finetune/lora/utils.py
rename to modellink/tasks/finetune/lora/utils.py
diff --git a/modellink/tasks/inference/__init__.py b/modellink/tasks/inference/__init__.py
new file mode 100644
index 00000000000..9a1307aeca2
--- /dev/null
+++ b/modellink/tasks/inference/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/inference/text_generation/__init__.py b/modellink/tasks/inference/text_generation/__init__.py
similarity index 100%
rename from tasks/inference/text_generation/__init__.py
rename to modellink/tasks/inference/text_generation/__init__.py
diff --git a/tasks/inference/text_generation/beam_utils.py b/modellink/tasks/inference/text_generation/beam_utils.py
similarity index 100%
rename from tasks/inference/text_generation/beam_utils.py
rename to modellink/tasks/inference/text_generation/beam_utils.py
diff --git a/tasks/inference/text_generation/communication.py b/modellink/tasks/inference/text_generation/communication.py
similarity index 99%
rename from tasks/inference/text_generation/communication.py
rename to modellink/tasks/inference/text_generation/communication.py
index f94a5a0ef8c..382d8d7d551 100644
--- a/tasks/inference/text_generation/communication.py
+++ b/modellink/tasks/inference/text_generation/communication.py
@@ -19,7 +19,7 @@
 import torch
 
 from megatron.core import parallel_state
-from modellink.error_utils import ensure_var_is_not_none
+from ....error_utils import ensure_var_is_not_none
 
 
 def recv_from_prev_pipeline_rank_(recv_buffer=None):
diff --git a/tasks/inference/text_generation/forward_step.py b/modellink/tasks/inference/text_generation/forward_step.py
similarity index 98%
rename from tasks/inference/text_generation/forward_step.py
rename to modellink/tasks/inference/text_generation/forward_step.py
index 923eb4e61aa..eb8d0c5df6c 100644
--- a/tasks/inference/text_generation/forward_step.py
+++ b/modellink/tasks/inference/text_generation/forward_step.py
@@ -21,8 +21,9 @@ import torch
 
 from megatron import get_args
 from megatron.core import parallel_state
-from modellink.error_utils import check_equal
-from tasks.inference.text_generation.utils import forward_step as _forward_step_helper
+
+from .utils import forward_step as _forward_step_helper
+from ....error_utils import check_equal
 
 
 class InferenceParams:
diff --git a/tasks/inference/text_generation/generation.py b/modellink/tasks/inference/text_generation/generation.py
similarity index 99%
rename from tasks/inference/text_generation/generation.py
rename to modellink/tasks/inference/text_generation/generation.py
index e4e4971613b..752022c8158 100644
--- a/tasks/inference/text_generation/generation.py
+++ b/modellink/tasks/inference/text_generation/generation.py
@@ -20,8 +20,8 @@ import torch.nn.functional as F
 
 from megatron import get_args
 from megatron.core import parallel_state
-from tasks.inference.text_generation.utils import pad_batch, top_k_logits
 
+from .utils import pad_batch, top_k_logits
 from .forward_step import ForwardStep
 from .beam_utils import BeamHypotheses
 from .communication import broadcast_from_last_pipeline_stage
diff --git a/tasks/inference/text_generation/infer_base.py b/modellink/tasks/inference/text_generation/infer_base.py
similarity index 98%
rename from tasks/inference/text_generation/infer_base.py
rename to modellink/tasks/inference/text_generation/infer_base.py
index b6e754a2700..be1354f3b45 100644
--- a/tasks/inference/text_generation/infer_base.py
+++ b/modellink/tasks/inference/text_generation/infer_base.py
@@ -26,10 +26,6 @@ logging.basicConfig(format="")
 logging.getLogger().setLevel(logging.INFO)
 
 
-
-
-
-
 def add_text_generate_args(parser):
     group = parser.add_argument_group(title='text generation')
     group.add_argument("--task",
@@ -40,6 +36,8 @@ def add_text_generate_args(parser):
     group.add_argument("--temperature", type=float, default=0.7, help='Sampling temperature.')
     group.add_argument("--max-length", type=int, default=256, help='Total length of text.')
     group.add_argument("--max-new-tokens", type=int, default=128, help='Size of the output generated text.')
+    group.add_argument('--inference-prompt-type', type=str, default='llama',
+            help="choose the prompt type for inference")
     return parser
 
 
diff --git a/tasks/inference/text_generation/module.py b/modellink/tasks/inference/text_generation/module.py
similarity index 98%
rename from tasks/inference/text_generation/module.py
rename to modellink/tasks/inference/text_generation/module.py
index c601fdfd46d..10755bce967 100644
--- a/tasks/inference/text_generation/module.py
+++ b/modellink/tasks/inference/text_generation/module.py
@@ -166,9 +166,9 @@ class MegatronModuleForCausalLM(MegatronModuleForCausalLMABC):
     def __init__(self, *args, **kwargs):
         super(MegatronModuleForCausalLM, self).__init__()
         from megatron import get_tokenizer
-        from tasks.inference.text_generation import greedy_search_or_sampling
-        from tasks.inference.text_generation import beam_search
-        from tasks.inference.text_generation.communication import broadcast_float_list
+        from .utils import greedy_search_or_sampling
+        from .generation import beam_search
+        from .communication import broadcast_float_list
 
         args = get_args()
         args.max_tokens_to_oom = args.max_tokens_to_oom if hasattr(args, "max_tokens_to_oom") else 4096
diff --git a/tasks/inference/text_generation/utils.py b/modellink/tasks/inference/text_generation/utils.py
similarity index 99%
rename from tasks/inference/text_generation/utils.py
rename to modellink/tasks/inference/text_generation/utils.py
index e716dbac44a..d3f780ee86b 100644
--- a/tasks/inference/text_generation/utils.py
+++ b/modellink/tasks/inference/text_generation/utils.py
@@ -21,19 +21,19 @@ import torch
 import torch.nn.functional as F
 from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP
 
+
 from megatron import get_args
 from megatron import get_tokenizer
 from megatron.core import parallel_state
-
 from megatron.utils import get_ltor_masks_and_position_ids, unwrap_model
 from megatron.core.pipeline_parallel.p2p_communication import recv_forward, send_forward
-
 from megatron.core.distributed import DistributedDataParallel as LocalDDP
 from megatron.model import Float16Module
 from megatron.core.utils import get_model_config
 
-from tasks.inference.text_generation.communication import broadcast_tensor
-from tasks.finetune.lora.utils import is_enable_lora, get_lora_model_classes
+
+from .communication import broadcast_tensor
+from ...finetune.lora.utils import is_enable_lora, get_lora_model_classes
 
 
 def get_batch(context_tokens):
diff --git a/modellink/training.py b/modellink/training.py
index 4e393c72c5e..b4eaf311e42 100644
--- a/modellink/training.py
+++ b/modellink/training.py
@@ -19,7 +19,7 @@ from functools import wraps
 from peft import LoraConfig, get_peft_model
 from megatron.arguments import core_transformer_config_from_args
 from megatron import get_args
-from tasks.finetune.lora.utils import is_enable_lora
+from .tasks.finetune.lora.utils import is_enable_lora
 
 
 def get_model_wrapper(fn):
diff --git a/tasks/finetune/__init__.py b/tasks/finetune/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tasks/finetune/lora/__init__.py b/tasks/finetune/lora/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tasks/inference/__init__.py b/tasks/inference/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tasks/inference/inference_llama.py b/tasks/inference/inference_llama.py
deleted file mode 100644
index a051a877034..00000000000
--- a/tasks/inference/inference_llama.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# coding=utf-8
-# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ascendspeed import megatron_adaptor
-
-import modellink
-from megatron import get_args
-from megatron.model import GPTModel
-from megatron.initialize import initialize_megatron
-from megatron.arguments import core_transformer_config_from_args
-from tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
-
-
-def model_provider(pre_process=True, post_process=True):
-    """Build the model."""
-    config = core_transformer_config_from_args(get_args())
-
-    init_model = GPTModel(
-        config,
-        parallel_output=False,
-        pre_process=pre_process,
-        post_process=post_process
-    )
-    return init_model
-
-
-if __name__ == "__main__":
-    initialize_megatron(extra_args_provider=add_text_generate_args,
-                        args_defaults={'no_load_rng': True,
-                                       'no_load_optim': True})
-
-    args = get_args()
-
-    model = GPTModel.from_pretrained(
-        model_provider=model_provider,
-        pretrained_model_name_or_path=args.load
-    )
-
-    task_factory(args, model)
diff --git a/tasks/inference/inference_mixtral.py b/tasks/inference/inference_mixtral.py
deleted file mode 100644
index 7ca311b4d8e..00000000000
--- a/tasks/inference/inference_mixtral.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# coding=utf-8
-# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ascendspeed import megatron_adaptor
-
-import modellink
-from megatron import get_args
-from megatron.model import GPTModel
-from megatron.initialize import initialize_megatron
-from megatron.arguments import core_transformer_config_from_args
-from tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
-
-
-def model_provider(pre_process=True, post_process=True):
-    """Build the model."""
-    config = core_transformer_config_from_args(get_args())
-    init_model = GPTModel(
-        config,
-        parallel_output=False,
-        pre_process=pre_process,
-        post_process=post_process
-    )
-    return init_model
-
-
-if __name__ == "__main__":
-    initialize_megatron(extra_args_provider=add_text_generate_args,
-                        args_defaults={'no_load_rng': True,
-                                       'no_load_optim': True})
-
-    args = get_args()
-
-    model = GPTModel.from_pretrained(
-        model_provider=model_provider,
-        pretrained_model_name_or_path=args.load
-    )
-
-    system_template = "<s>"
-    dialog_template = "[INST] {instruction} [/INST] "
-    template = system_template + dialog_template
-
-    task_factory(args, model, system_template=system_template, dialog_template=dialog_template)
diff --git a/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py b/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py
index d0dacfdf08b..428789200c8 100644
--- a/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py
+++ b/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py
@@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[12], "iter_0000001")
diff --git a/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py b/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py
index 76a37964f20..38e3f400906 100644
--- a/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py
+++ b/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py
@@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[12], "iter_0000001")
diff --git a/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py b/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py
index 04b4105c5e4..16bba71414d 100644
--- a/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py
+++ b/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py
@@ -23,7 +23,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[10], "iter_0000001")
diff --git a/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py b/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py
index 2cd8e4d5d59..0090794a1fd 100644
--- a/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py
+++ b/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py
@@ -32,7 +32,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
diff --git a/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py b/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py
index 0c456b3dc9f..b6730f3cd04 100644
--- a/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py
+++ b/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py
@@ -32,7 +32,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
diff --git a/tests/ut/test_convert_ckpt_from_huggingface.py b/tests/ut/test_convert_ckpt_from_huggingface.py
index edc03bd8730..58459d3f318 100644
--- a/tests/ut/test_convert_ckpt_from_huggingface.py
+++ b/tests/ut/test_convert_ckpt_from_huggingface.py
@@ -41,7 +41,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[12], "iter_0000001")
diff --git a/tools/checkpoint/util.py b/tools/checkpoint/convert_ckpt.py
similarity index 100%
rename from tools/checkpoint/util.py
rename to tools/checkpoint/convert_ckpt.py
diff --git a/tools/checkpoint/convert_util.sh b/tools/checkpoint/convert_util.sh
deleted file mode 100644
index edcaaeb7087..00000000000
--- a/tools/checkpoint/convert_util.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-CONVERT_ARGS="
-    --no-checking \
-    --model-type GPT \
-    --loader loader_llama2_hf \
-    --saver saver_megatron \
-    --target-tensor-parallel-size 4 \
-    --load-dir ./ckpt \
-    --save-dir ./save_dir \
-    --tokenizer-model None
-"
-source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py $CONVERT_ARGS
\ No newline at end of file
-- 
Gitee