diff --git a/README.md b/README.md
index 868df24bbd305a85e32340dc371382a2b73ca13c..b19885c07fa812ffeae0e0f8cc84f7d7000756da 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,6 @@
 <p align="center"> <img src="sources/images/logo.png" height="90px" width="400px"> </p>
 
 <p align="center">
-    <a>
-        <img alt="Docs" src="https://readthedocs.org/projects/internlm/badge/?version=latest">
-    </a>
     <a href="https://gitee.com/ascend/AscendSpeed/blob/master/LICENSE">
     <a href="https://gitee.com/ascend/AscendSpeed/blob/master/LICENSE">
         <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
@@ -56,61 +53,61 @@ ModelLink旨在为华为 [昇腾芯片](https://open.codehub.huawei.com/OpenBaiz
       <td rowspan="1"> <a href="examples/aquila/README.md">Aquila</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_aquila_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_aquila_7b_ptd.sh">评估</a> </td>
+      <td> <a href="examples/aquila/generate_aquila_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/aquila/evaluate_aquila_7b_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/baichuan/generate_baichuan_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
-      <td> <a href="tasks/finetune/tune_baichuan_ptd_13B.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_baichuan_13b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_13B_ptd.sh">评估</a> </td>
+      <td> -- </td>
+      <td> <a href="examples/baichuan/generate_baichuan_13b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_13B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan2</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_13b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_13B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_13b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_13B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/bloom/README.md">Bloom</a></td>
       <td> 7B1 </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/bloom/evaluate_bloom_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td> 176B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_176b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_bloom_176b_ptd.sh">评估</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_176b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/bloom/evaluate_bloom_176b_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/intern/README.md">InternLM</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_lnternlm_7b_ptd.sh">对话</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_internlm_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/intern/generate_lnternlm_7b_ptd.sh">对话</a> </td>
+      <td> <a href="examples/intern/evaluate_internlm_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
@@ -123,89 +120,89 @@ ModelLink旨在为华为 [昇腾芯片](https://open.codehub.huawei.com/OpenBaiz
     <tr>
       <td rowspan="4"><a href="examples/llama/README.md">LLaMA</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama_7b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_7b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_7B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/llama/tune_llama_7b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_7b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_7B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama_13b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_13b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_13B_ptd.sh">评估</a> </td>
+      <td> <a href="examples/llama/tune_llama_13b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_13b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_13B_ptd.sh">评估</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>33B</td>
-      <td> <a href="tasks/finetune/tune_llama_33b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_33b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_33B_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/llama/tune_llama_33b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_33b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_33B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td > 65B </td>
-      <td > <a href="tasks/finetune/tune_llama_65b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_65b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_65B_ptd.sh">评估 </a> </td>
+      <td > <a href="examples/llama/tune_llama_65b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_65b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama/evaluate_llama_65B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="4"><a href="examples/llama2/README.md">LLaMA2</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama2_7b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_7b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_7B_ptd.sh">评估 </a>  </td>
+      <td> <a href="examples/llama2/tune_llama2_7b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_7b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_7B_ptd.sh">评估 </a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama2_13b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_13b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_13B_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/llama2/tune_llama2_13b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_13b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_13B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td> 34B </td>
-      <td> <a href="tasks/finetune/tune_llama2_34b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_34B_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_34B_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/llama2/tune_llama2_34b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_34B_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_34B_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td > 70B </td>
-      <td > <a href="tasks/finetune/tune_llama2_70b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama2_70b_lora_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_70B_ptd.sh">评估 </a> </td>
+      <td > <a href="examples/llama2/tune_llama2_70b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama2/generate_llama2_70b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_70B_ptd.sh">评估 </a> </td>
       <td>  <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="3"><a href="examples/qwen/README.md">Qwen</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_7b_ptd.sh">对话 </a></td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_7b_ptd.sh">评估 </a></td>
+      <td> <a href="examples/qwen/generate_qwen_7b_ptd.sh">对话 </a></td>
+      <td> <a href="examples/qwen/evaluate_qwen_7b_ptd.sh">评估 </a></td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>14B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_14b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_14b_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/qwen/generate_qwen_14b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_14b_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>72B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_72b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_72b_ptd.sh">评估 </a> </td>
+      <td> <a href="examples/qwen/generate_qwen_72b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_72b_ptd.sh">评估 </a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td rowspan="1"><a href="examples/mixtral/README.md">Mixtral</a></td>
       <td>8x7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_mixtral_8x7b_ptd.sh">对话 </a> </td>
-      <td> <a href="tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh">评估 </a>  </td>
+      <td> <a href="examples/mixtral/generate_mixtral_8x7b_ptd.sh">对话 </a> </td>
+      <td> <a href="examples/mixtral/evaluate_mixtral_8x7b_ptd.sh">评估 </a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
   </tbody>
@@ -630,7 +627,7 @@ ModelLink:
 1.  如果你尝试使用 huggingface 的模型权重，请首先进行权重转换， 以 Llama-7B 为例:
       - PTD 策略的转换
            ```bash
-            python tools/checkpoint/util.py --model-type GPT \
+            python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                             --loader llama2_hf \
                                             --saver megatron \
                                             --target-tensor-parallel-size 1 \
@@ -645,11 +642,11 @@ ModelLink:
 
     - 仅仅使用 PTD 策略训练的模型：在这种模式下，模型以 Megatron-LM 的风格被 流水并行 和 张量并行 切分
         ```bash
-        sh tasks/inference/generate_llama_7b_ptd.sh
+        sh examples/llama/generate_llama_7b_ptd.sh
         ```
     - 如果你仅仅使用 Lora, 可以参考:
         ```bash
-        sh tasks/inference/generate_llama_7b_lora_ptd.sh
+        sh examples/llama/generate_llama_7b_lora_ptd.sh
         ```
 
 #### 使用手册
@@ -836,7 +833,7 @@ VOCAB_FILE=../models/llama7b-hf/
 DATA_PATH="dataset/boolq/test"
 TASK="boolq"
 # 配置生成参数
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -857,13 +854,13 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
        --micro-batch-size 1  \
        --seed 42 | tee logs/train.log
 # 开启评估
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
 ```
 
 最重要的评估参数是 `--max-new-tokens`, 它表示模型输出的生成长度，比如，多项选择问题的输出长度就会明显比编码任务的输出长度小，该参数也很大程度上影响了模型的评估性能。通过--evaluation-batch-size参数可以设置多batch推理，提升模型评估性能。
 
 ```bash
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -899,7 +896,7 @@ HumanEval 是一个用于挑战代码生成问题的数据集，具有164个编
 ##### BoolQ
 
 BoolQ 是一个 yes/no 的问答数据集， 每一个问题包含了一个（问题，文章，答案）三元组，同时有文章的标题作为额外的选择性输入。BoolQ 数据集的评估相对简单，只需要配置 `TASK="boolq"`, `--max-new-token=1`。
-零样本评估的结果通常会被给定的 prompt 影响，可以尝试通过在 `tasks/evaluation/evaluation.py` 中设置合适的 prompt 得到更高的分数，
+零样本评估的结果通常会被给定的 prompt 影响，可以尝试通过在 `evaluation.py` 中设置合适的 prompt 得到更高的分数，
 
 ```bash
 # 通过修改 template 更新prompt
@@ -920,44 +917,42 @@ GSM8K 是一个有8.5k高质量小学数学应用题文本的数据集，每一
 在很多网站，MMLU 的精度会依据学科进行评估，57个学科主要属于四个大类， 因此该数据集也可以基于四个大类进行打分，[网站](https://github.com/hendrycks/test/blob/master/categories.py) 给出了具体的57个类别。
 
 ### <span id="jump18">使用加速特性
-ModelLink支持张量并行、流水线并行、序列并行、重计算、分布式优化器、等多种加速算法，下表为各种加速特性对应的使能开关：
+ModelLink支持张量并行、流水线并行、序列并行、重计算、分布式优化器等多种加速算法和融合算子，下表为各种加速特性对应的使能开关：
+
+|                 加速特性                 |              使能参数              |
+|:------------------------------------:|:------------------------------:|
+|                 张量并行                 |  --tensor-model-parallel-size  |
+|                流水线并行                 | --pipeline-model-parallel-size |
+|               流水并行动态划分               |        --num-layer-list        |
+|                 序列并行                 |      --sequence-parallel       |
+|                 重计算                  |    --recompute-granularity     |
+|                分布式优化器                |  --use-distributed-optimizer   |
+|           DDP allreduce 掩盖           |  --overlap-grad-reduce   |
+|           Flash attention            |        --use-flash-attn        |
+|            Fused rmsnorm             |      --use-fused-rmsnorm       |
+|             Fused swiglu             |            --swiglu            |
+| Fused rotary <br/>position embedding |   --use-fused-rotary-pos-emb   |
 
-|   加速特性    |   使能参数   |
-|:---------:|:--------:|
-|   张量并行    |  --tensor-model-parallel-size  |
-|   流水线并行   | --pipeline-model-parallel-size |
-|   序列并行    | --sequence-parallel |
-|    重计算    | --recompute-granularity |
-|  分布式优化器   | --use-distributed-optimizer |
 
 ```bash
 torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --tensor-model-parallel-size ${TP} \
     --pipeline-model-parallel-size ${PP} \
+    --num-layer-list 1,2,2,2,1 \
     --sequence-parallel \
     --recompute-granularity full \
     --recompute-method block \
     --recompute-num-layers 72 \
     --use-distributed-optimizer \
-    ... \
-    ...
-```
-在模型训练和下游任务中，可以使用--tensor-model-parallel-size和--pipeline-model-parallel-size将模型切分到不同的设备上，降低单设备内存占用。
-使用--sequence-parallel对数据进行切分，使得模型可以训练超长序列数据。recompute-granularity或者use-distributed-optimizer也能降低单设备内存占用。
-
-ModelLink支持attn、rmsnorm、swiglu、rotary-pos-emb、mlp等多个融合算子，提升模型训练性能：
-```bash
-torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --use-flash-attn \
     --use-fused-rmsnorm \
-    --use-fused-swiglu \
+    --swiglu \
+    --overlap-grad-reduce \
     --use-fused-rotary-pos-emb \
-    --mlp-layer-fusion \
     ... \
     ...
 ```
 
-
 ## 致谢
 
 ---
diff --git a/README_en.md b/README_en.md
index a0b6b85c38d27cb6b93ae83eb59d8badf3fe173c..b88c6b95c5680fb916a6dff9e9f01839462db33c 100644
--- a/README_en.md
+++ b/README_en.md
@@ -1,9 +1,6 @@
 <p align="center"> <img src="sources/images/logo.png" height="90px" width="400px"> </p>
 
 <p align="center">
-    <a>
-        <img alt="Docs" src="https://readthedocs.org/projects/internlm/badge/?version=latest">
-    </a>
     <a href="https://gitee.com/ascend/AscendSpeed/blob/master/LICENSE">
         <img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue">
     </a>
@@ -56,61 +53,61 @@ Current ModelLink supports pre-training and fine-tuning for the following models
       <td rowspan="1"> <a href="examples/aquila/README.md">Aquila</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_aquila_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_aquila_7b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/aquila/generate_aquila_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/aquila/evaluate_aquila_7b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/baichuan/generate_baichuan_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
-      <td> <a href="tasks/finetune/tune_baichuan_ptd_13B.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_baichuan_13b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_13B_ptd.sh">evaluation</a> </td>
+      <td> -- </td>
+      <td> <a href="examples/baichuan/generate_baichuan_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan/evaluate_baichuan_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"> <a href="examples/baichuan/README.md">Baichuan2</a> </td>
       <td> 7B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td> 13B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_baichuan2_13b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan2_13B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/baichuan2/generate_baichuan2_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan2/evaluate_baichuan2_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/bloom/README.md">Bloom</a></td>
       <td> 7B1 </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_baichuan_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/baichuan/evaluate_bloom_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td> 176B </td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_bloom_176b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_bloom_176b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/bloom/generate_bloom_176b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/bloom/evaluate_bloom_176b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="2"><a href="examples/intern/README.md">InternLM</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_lnternlm_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_internlm_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/intern/generate_lnternlm_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/intern/evaluate_internlm_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
@@ -123,89 +120,89 @@ Current ModelLink supports pre-training and fine-tuning for the following models
     <tr>
       <td rowspan="4"><a href="examples/llama/README.md">LLaMA</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama_7b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_7b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_7B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama/tune_llama_7b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_7B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama_13b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_13b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_13B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama/tune_llama_13b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td>33B</td>
-      <td> <a href="tasks/finetune/tune_llama_33b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_33b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_33B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama/tune_llama_33b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_33b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_33B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a> </td>
     </tr>
     <tr>
       <td > 65B </td>
-      <td > <a href="tasks/finetune/tune_llama_65b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama_65b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama_65B_ptd.sh">evaluation</a> </td>
+      <td > <a href="examples/llama/tune_llama_65b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama/generate_llama_65b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama/evaluate_llama_65B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="4"><a href="examples/llama2/README.md">LLaMA2</a></td>
       <td>7B</td>
-      <td> <a href="tasks/finetune/tune_llama2_7b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_7B_ptd.sh">evaluation</a>  </td>
+      <td> <a href="examples/llama2/tune_llama2_7b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_7B_ptd.sh">evaluation</a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>13B</td>
-      <td> <a href="tasks/finetune/tune_llama2_13b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_13b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_13B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama2/tune_llama2_13b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_13b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_13B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td> 34B </td>
-      <td> <a href="tasks/finetune/tune_llama2_34b_ptd.sh">lora </a> </td>
-      <td> <a href="tasks/inference/generate_llama2_34B_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_34B_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/llama2/tune_llama2_34b_ptd.sh">lora </a> </td>
+      <td> <a href="examples/llama2/generate_llama2_34B_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_34B_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td > 70B </td>
-      <td > <a href="tasks/finetune/tune_llama2_70b_ptd.sh">lora</a> </td>
-      <td> <a href="tasks/inference/generate_llama2_70b_lora_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_llama2_70B_ptd.sh">evaluation</a> </td>
+      <td > <a href="examples/llama2/tune_llama2_70b_ptd.sh">lora</a> </td>
+      <td> <a href="examples/llama2/generate_llama2_70b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/llama2/evaluate_llama2_70B_ptd.sh">evaluation</a> </td>
       <td>  <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json</a>  </td>
     </tr>
     <tr>
       <td rowspan="3"><a href="examples/qwen/README.md">Qwen</a></td>
       <td>7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_7b_ptd.sh">inference</a></td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_7b_ptd.sh">evaluation</a></td>
+      <td> <a href="examples/qwen/generate_qwen_7b_ptd.sh">inference</a></td>
+      <td> <a href="examples/qwen/evaluate_qwen_7b_ptd.sh">evaluation</a></td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>14B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_14b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_14b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/qwen/generate_qwen_14b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_14b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td>72B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_qwen_72b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_qwen_72b_ptd.sh">evaluation</a> </td>
+      <td> <a href="examples/qwen/generate_qwen_72b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/qwen/evaluate_qwen_72b_ptd.sh">evaluation</a> </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
     <tr>
       <td rowspan="1"><a href="examples/mixtral/README.md">Mixtral</a></td>
       <td>8x7B</td>
       <td> -- </td>
-      <td> <a href="tasks/inference/generate_mixtral_8x7b_ptd.sh">inference</a> </td>
-      <td> <a href="tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh">evaluation</a>  </td>
+      <td> <a href="examples/mixtral/generate_mixtral_8x7b_ptd.sh">inference</a> </td>
+      <td> <a href="examples/mixtral/evaluate_mixtral_8x7b_ptd.sh">evaluation</a>  </td>
       <td> <a href="https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json">alpaca_data.json </a> </td>
     </tr>
   </tbody>
@@ -644,7 +641,7 @@ Currently, we support the following four cases of inference:
 
       - PTD only
            ```bash
-            python tools/checkpoint/util.py --model-type GPT \
+            python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                             --loader llama2_hf \
                                             --saver megatron \
                                             --target-tensor-parallel-size 1 \
@@ -662,7 +659,7 @@ Currently, we support the following four cases of inference:
         ```
     - **If you want to use lora model**, for details, refer to:
         ```bash
-        sh tasks/inference/generate_llama_7b_lora_ptd.sh
+        sh examples/llama/generate_llama_7b_lora_ptd.sh
         ```
 
 #### Usage Guide
@@ -848,7 +845,7 @@ VOCAB_FILE=../models/llama7b-hf/
 DATA_PATH="dataset/boolq/test"
 TASK="boolq"
 # configure generation parameters
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -868,7 +865,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
        --micro-batch-size 1  \
        --seed 42 | tee logs/train.log
 # start evaluation
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
 ```
 
 #### Task Introduction
@@ -876,7 +873,7 @@ The most important evaluation parameters must be `--max-new-tokens`, which means
 questions' output length is obviously shorter than coding tasks. Besides, this parameter largely decides the speed of model generation.
 
 ```bash
-python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 512 \
@@ -929,13 +926,19 @@ you want to evaluate on test data, you should email your results to [C-Eval](htt
 ### <span id="jump18">Acceleration Features
 ModelLink supports various acceleration algorithms such as tensor parallelism, pipeline parallelism, sequence parallelism, recomputation, distributed optimizer, and more. The table below shows the enable switches corresponding to each acceleration feature:
 
-|   Acceleration Feature    |   Enable Parameter   |
-|:---------:|:--------:|
-|   Tensor Parallel    |  --tensor-model-parallel-size  |
-|   Pipeline Parallel   | --pipeline-model-parallel-size |
-|   Sequence Parallel    | --sequence-parallel |
-|    Recomputation    | --recompute-granularity |
-|  Distributed Optimizer   | --use-distributed-optimizer |
+|         Acceleration Feature         |        Enable Parameter        |
+|:------------------------------------:|:------------------------------:|
+|           Tensor Parallel            |  --tensor-model-parallel-size  |
+|          Pipeline Parallel           | --pipeline-model-parallel-size |
+|       Dynamic division for PP        |        --num-layer-list        |
+|          Sequence Parallel           |      --sequence-parallel       |
+|            Recomputation             |    --recompute-granularity     |
+|        Distributed Optimizer         |  --use-distributed-optimizer   |
+|        overlap DDP allreduce         |  --overlap-grad-reduce   |
+|           Flash attention            |        --use-flash-attn        |
+|            Fused rmsnorm             |      --use-fused-rmsnorm       |
+|             Fused swiglu             |            --swiglu            |
+| Fused rotary <br/>position embedding |   --use-fused-rotary-pos-emb   |
 
 ```bash
 torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
@@ -946,25 +949,15 @@ torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --recompute-method block \
     --recompute-num-layers 72 \
     --use-distributed-optimizer \
-    ... \
-    ...
-```
-In model training and downstream tasks, you can use --tensor-model-parallel-size and --pipeline-model-parallel-size to distribute the model across different devices, reducing memory usage per device.
-Using --sequence-parallel to split the data enables the model to train on extremely long sequences. The options recompute-granularity or use-distributed-optimizer can also reduce memory usage per device.
-
-ModelLink supports multiple fused operators such as attn, rmsnorm, swiglu, rotary-pos-emb, mlp, etc., to improve model training performance:
-```bash
-torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \
     --use-flash-attn \
     --use-fused-rmsnorm \
-    --use-fused-swiglu \
+    --swiglu \
     --use-fused-rotary-pos-emb \
-    --mlp-layer-fusion \
+    --overlap-grad-reduce \
     ... \
     ...
 ```
 
-
 ## Acknowledgments
 
 ---
diff --git a/tasks/evaluation/evaluation_llama.py b/evaluation.py
similarity index 81%
rename from tasks/evaluation/evaluation_llama.py
rename to evaluation.py
index 95fb2f1b880fdb3eaeffc2190578ecb9a65c3a05..352767c076db5ff68474dd467119054be4ff78c0 100644
--- a/tasks/evaluation/evaluation_llama.py
+++ b/evaluation.py
@@ -26,14 +26,16 @@ from megatron.initialize import initialize_megatron
 from megatron import get_args
 from megatron.model import GPTModel
 from megatron.arguments import core_transformer_config_from_args
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.boolq_eval import BoolqEval
-from tasks.evaluation.eval_impl.gsm8k_eval import Gsm8kEval
-from tasks.evaluation.eval_impl.mmlu_eval import MmluEval
-from tasks.evaluation.eval_impl.ceval_exam import CEvalExam
-from tasks.evaluation.eval_impl.bbh_eval import BBHEval
-from tasks.evaluation.eval_impl.agi_eval import AGIEvalExam
-from tasks.evaluation.eval_impl.human_eval import HumanEval
+
+from modellink.tasks.evaluation.utils import add_text_generate_args
+from modellink.tasks.evaluation.eval_api.chat import Chat
+from modellink.tasks.evaluation.eval_impl.boolq_eval import BoolqEval
+from modellink.tasks.evaluation.eval_impl.gsm8k_eval import Gsm8kEval
+from modellink.tasks.evaluation.eval_impl.mmlu_eval import MmluEval
+from modellink.tasks.evaluation.eval_impl.ceval_exam import CEvalExam
+from modellink.tasks.evaluation.eval_impl.bbh_eval import BBHEval
+from modellink.tasks.evaluation.eval_impl.agi_eval import AGIEvalExam
+from modellink.tasks.evaluation.eval_impl.human_eval import HumanEval
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
 logging.getLogger().setLevel(logging.INFO)
@@ -53,30 +55,6 @@ def model_provider(pre_process=True, post_process=True):
     return init_model
 
 
-def add_text_generate_args(parser):
-    group = parser.add_argument_group(title='text generation')
-    group.add_argument("--task-data-path",
-                       nargs='*',
-                       default=[],
-                       help='Path to the training dataset. Accepted format:'
-                            '1) a single data path, 2) multiple datasets in the'
-                            'form: dataset1-path dataset2-path ...')
-    group.add_argument("--temperature", type=float, default=0.5,
-                       help='Sampling temperature.')
-    group.add_argument("--evaluation-batch-size", type=int, default=1,
-                       help='Size of evaluation batch')
-    group.add_argument("--greedy", action='store_true', default=False,
-                       help='Use greedy sampling.')
-    group.add_argument("--top-p", type=float, default=0.9,
-                       help='Top p sampling.')
-    group.add_argument("--top-k", type=int, default=0,
-                       help='Top k sampling.')
-    group.add_argument("--max-new-tokens", type=int, default=128,
-                       help='Size of the output generated text.')
-    group.add_argument("--task", nargs='*', default=[], help='Choose one task from mmlu, boolq and gsm8k')
-    return parser
-
-
 def get_result(result):
     if result:
         final_results = []
diff --git a/examples/aquila/README.md b/examples/aquila/README.md
index 9875bafa03bcc74ff5daa533b9a7528b1572dd0c..dfcbd2bb3d373da73dfab986dc058d178874b900 100644
--- a/examples/aquila/README.md
+++ b/examples/aquila/README.md
@@ -98,7 +98,7 @@ cd ModelLink/
 mkdir model_weights
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --load-dir ./HF_Aquila7B_downloaded \
     --save-dir ./model_weights/aquila \
@@ -114,7 +114,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -172,7 +172,7 @@ Aquila-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 推理与预训练不同，我们必须加载预训练权重，请注意：在转换权重时使用的模型结构参数，和运行评估任务时使用的模型结构参数，应保持一致。
 
-权重转换完成后，我们配置Aquila-7B推理脚本`tasks/inference/generate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径等（下面样例仅供参考）
+权重转换完成后，我们配置Aquila-7B推理脚本`example/aquila/generate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径等（下面样例仅供参考）
 
 ```shell
 # 请按实际情况修改模型权重路径和分词器路径
@@ -182,7 +182,7 @@ TOKENIZER_PATH="./HF_Aquila7B_downloaded/"
 
 启动Aquila-7B推理:
 ```shell
-bash ./tasks/inference/generate_aquila_7b_ptd.sh
+bash examples/aquila/generate_aquila_7b_ptd.sh
 ```
 
 部分推理样本如下：
@@ -198,7 +198,7 @@ Aquila-7B:
 
 评估与推理类似，也必须加载转换后的权重，请注意：在转换权重时使用的模型结构参数，和运行评估任务时使用的模型结构参数，应保持一致。
 
-权重转换完成后，我们配置Aquila-7B评估脚本 `tasks/evaluation/evaluate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径，评估数据的路径，以及评估任务的名字等(下面样例仅供参考)：
+权重转换完成后，我们配置Aquila-7B评估脚本 `examples/aquila/evaluate_aquila_7b_ptd.sh`，需要正确指定加载权重的路径，词表路径，评估数据的路径，以及评估任务的名字等(下面样例仅供参考)：
 
 ```shell
 CKPT_LOAD_DIR="./model_weights/aquila/"
@@ -209,7 +209,7 @@ TASK="boolq"
 
 启动Aquila-7B评估
 ```shell
-bash tasks/evaluation/evaluate_aquila_7b_ptd.sh
+bash examples/aquila/evaluate_aquila_7b_ptd.sh
 ```
 
 Aquila-7B在**Ascend NPU**中的评测表现：
diff --git a/examples/aquila/README_en.md b/examples/aquila/README_en.md
index 3b1b5479f0fb65e3442efe4e69c1c57ae9cd22cc..3afc181527c672760a2d95f912beddca80cda6a5 100644
--- a/examples/aquila/README_en.md
+++ b/examples/aquila/README_en.md
@@ -97,7 +97,7 @@ mkdir model_weights
 # please modify the path to set_env.sh based on your environment.
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --load-dir ./HF_Aquila7B_downloaded \
     --save-dir ./model_weights/aquila \
@@ -113,7 +113,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -166,7 +166,7 @@ The performance of Aquila-7B in Ascend NPU and reference device:
 
 We support AscendSpeed Inference for text generation with Aquila 7B model.
 
-Inference is different from pre-training because it requires loading the pre-trained model weights. Therefore, we need to complete the aforementioned model weight conversion task first, then configure the Aquila-7B Inference shell script `tasks/inference/generate_aquila_7b_ptd.sh`. "CKPT_LOAD_DIR" must point to the converted weights directory, and "TOKENIZER_PATH" must point to the directory which contains Aquila vocabulary files -- in our example, it is "./HF_Aquila7B_downloaded". In your operation, please fill in correct value based on your actual scenario.
+Inference is different from pre-training because it requires loading the pre-trained model weights. Therefore, we need to complete the aforementioned model weight conversion task first, then configure the Aquila-7B Inference shell script `examples/aquila/generate_aquila_7b_ptd.sh`. "CKPT_LOAD_DIR" must point to the converted weights directory, and "TOKENIZER_PATH" must point to the directory which contains Aquila vocabulary files -- in our example, it is "./HF_Aquila7B_downloaded". In your operation, please fill in correct value based on your actual scenario.
 
 ```shell
 # please change to actual values
@@ -176,7 +176,7 @@ TOKENIZER_PATH="./HF_Aquila7B_downloaded/"
 
 Start Aquila-7B Inference:
 ```shell
-bash ./tasks/inference/generate_aquila_7b_ptd.sh
+bash ./examples/aquila/generate_aquila_7b_ptd.sh
 ```
 
 Sample results of Aquila-7B Inference:
@@ -190,7 +190,7 @@ We use BoolQ benchmark to evaluate our model. You can [go to the BoolQ Benchmark
 
 Evaluation task is similar to inference task too，it also requires loading the pre-trained model weights. Please note that the model structure parameters used in converting weights should be consistent with those used in running the evaluation task.
 
-After weight conversion is complete, we configure the Aquila-7B evaluation script `tasks/evaluation/evaluate_aquila_7b_ptd.sh`. We need to correctly specify the path to load weights, the path to tokenizer and vocab, and so on (the following example is for reference only)
+After weight conversion is complete, we configure the Aquila-7B evaluation script `examples/aquila/evaluate_aquila_7b_ptd.sh`. We need to correctly specify the path to load weights, the path to tokenizer and vocab, and so on (the following example is for reference only)
 
 ```shell
     CKPT_LOAD_DIR="./model_weights/aquila/"
@@ -201,7 +201,7 @@ After weight conversion is complete, we configure the Aquila-7B evaluation scrip
 
 Start evaluation task
 ```shell
-bash ./tasks/evaluation/evaluate_aquila_7b_ptd.sh
+bash ./examples/aquila/evaluate_aquila_7b_ptd.sh
 ```
 
 Sample Aquila-7B performance running in **Ascend NPU**:
diff --git a/tasks/evaluation/evaluate_aquila_7b_ptd.sh b/examples/aquila/evaluate_aquila_7b_ptd.sh
similarity index 92%
rename from tasks/evaluation/evaluate_aquila_7b_ptd.sh
rename to examples/aquila/evaluate_aquila_7b_ptd.sh
index d4ae5d89121da67378320d99de44cd248d340e8c..e46c9b87a23050cf6eaaea7e24905a76e5eed19e 100644
--- a/tasks/evaluation/evaluate_aquila_7b_ptd.sh
+++ b/examples/aquila/evaluate_aquila_7b_ptd.sh
@@ -26,7 +26,7 @@ DISTRIBUTED_ARGS="
     "
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py \
        --task-data-path $EVAL_DATA_PATH \
        --task $TASK\
        --tensor-model-parallel-size ${TP} \
diff --git a/tasks/inference/generate_aquila_7b_ptd.sh b/examples/aquila/generate_aquila_7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_aquila_7b_ptd.sh
rename to examples/aquila/generate_aquila_7b_ptd.sh
index 906eb2c22fd338e8f6fd59fd194f3ffa2ecd56ce..cd17e500296db83b39927efcccf55617225e4722 100644
--- a/tasks/inference/generate_aquila_7b_ptd.sh
+++ b/examples/aquila/generate_aquila_7b_ptd.sh
@@ -23,7 +23,7 @@ DISTRIBUTED_ARGS="
     --master_port $MASTER_PORT
     "
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/inference/inference.py \
        --tensor-model-parallel-size ${TP} \
        --pipeline-model-parallel-size ${PP} \
        --num-layers 32 \
diff --git a/examples/baichuan/README.md b/examples/baichuan/README.md
index a2e58067fe69679185a9aa447a3cf0923cf1fb13..be0f5272a7d9164c73c0345c20adcc1efc73458a 100644
--- a/examples/baichuan/README.md
+++ b/examples/baichuan/README.md
@@ -22,7 +22,6 @@
     - [脚本](#脚本)
     - [性能](#性能)
         - [吞吐](#吞吐)
-  - [Lora微调](#Lora微调)
   - [推理](#推理)
   - [评估](#评估)
 
@@ -103,7 +102,7 @@ mkdir baichuan-7B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -120,7 +119,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -188,7 +187,7 @@ Baichuan-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-首先需要配置baichuan-7B的推理脚本: tasks/inference/generate_baichuan_7b_ptd.sh
+首先需要配置baichuan-7B的推理脚本: examples/baichuan/generate_baichuan_7b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -202,7 +201,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan_7b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan_7b_ptd.sh
+bash examples/baichuan/generate_baichuan_7b_ptd.sh
 ```
 
 推理的示例如下:
@@ -224,7 +223,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_7B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_7B_ptd.sh
 ```
 
 <table>
@@ -337,7 +336,7 @@ mkdir baichuan-13B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -355,7 +354,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -422,7 +421,7 @@ Baichuan-13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 
 ## 推理
 
-配置baichuan-13B的推理脚本: tasks/inference/generate_baichuan_13b_ptd.sh
+配置baichuan-13B的推理脚本: examples/baichuan/generate_baichuan_13b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -436,7 +435,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan_13b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan_13b_ptd.sh
+bash examples/baichuan/generate_baichuan_13b_ptd.sh
 ```
 
 推理的示例如下:
@@ -457,7 +456,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_13B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_13B_ptd.sh
 ```
 
 <table>
diff --git a/examples/baichuan/README_en.md b/examples/baichuan/README_en.md
index 85038acc51d69ac8764a06b91e70e08573026e76..2c0eac050d0254a388e4b7e81ab16432f3ab135e 100644
--- a/examples/baichuan/README_en.md
+++ b/examples/baichuan/README_en.md
@@ -101,7 +101,7 @@ mkdir baichuan-7B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -117,7 +117,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -184,7 +184,7 @@ The performance of Baichuan-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan-7B inference script: tasks/inference/generate_baichuan_7b_ptd.sh
+Config baichuan-7B inference script: examples/baichuan/generate_baichuan_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -193,9 +193,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan-7B inference script: tasks/inference/generate_baichuan_7b_ptd.sh
+Launch baichuan-7B inference script: examples/baichuan/generate_baichuan_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan_7b_ptd.sh
+bash examples/baichuan/generate_baichuan_7b_ptd.sh
 ```
 Some inference samples are as follows:
 
@@ -217,7 +217,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_7B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_7B_ptd.sh
 ```
 
 <table>
@@ -330,7 +330,7 @@ mkdir baichuan-13B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -348,7 +348,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -412,7 +412,7 @@ The performance of the Baichuan-13B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan-13B inference script: tasks/inference/generate_baichuan_13b_ptd.sh
+Config baichuan-13B inference script: examples/baichuan/generate_baichuan_13b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -421,9 +421,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan-13B inference script: tasks/inference/generate_baichuan_13b_ptd.sh
+Launch baichuan-13B inference script: examples/baichuan/generate_baichuan_13b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan_13b_ptd.sh
+bash examples/baichuan/generate_baichuan_13b_ptd.sh
 ```
 Some inference samples are as follows:
 ![Inference](../../sources/images/baichuan/baichuan_13B_inference.png)
@@ -444,7 +444,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan_13B_ptd.sh
+bash ./examples/baichuan/evaluate_baichuan_13B_ptd.sh
 ```
 
 <table>
diff --git a/tasks/evaluation/evaluate_baichuan_13B_ptd.sh b/examples/baichuan/evaluate_baichuan_13B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_baichuan_13B_ptd.sh
rename to examples/baichuan/evaluate_baichuan_13B_ptd.sh
index e7e9a845dd3c25372b06a632faa5a36923138870..adb46c0083f5060a258e5be3068695450b0e1f4c 100644
--- a/tasks/evaluation/evaluate_baichuan_13B_ptd.sh
+++ b/examples/baichuan/evaluate_baichuan_13B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK \
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_baichuan_7B_ptd.sh b/examples/baichuan/evaluate_baichuan_7B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_baichuan_7B_ptd.sh
rename to examples/baichuan/evaluate_baichuan_7B_ptd.sh
index 3db1076b7126742a049698f49ce494b7239f9eab..f0e11b37c089a008898f05dee4d5ba7ada115d92 100644
--- a/tasks/evaluation/evaluate_baichuan_7B_ptd.sh
+++ b/examples/baichuan/evaluate_baichuan_7B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK \
        --seq-length 2048 \
diff --git a/tasks/inference/generate_baichuan_13b_ptd.sh b/examples/baichuan/generate_baichuan_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan_13b_ptd.sh
rename to examples/baichuan/generate_baichuan_13b_ptd.sh
index f10f08b06dace8cb9f3c29377efb640e38612b26..a54127058369e9519b35c303c9063bf8d7946443 100644
--- a/tasks/inference/generate_baichuan_13b_ptd.sh
+++ b/examples/baichuan/generate_baichuan_13b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_baichuan_7b_ptd.sh b/examples/baichuan/generate_baichuan_7b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan_7b_ptd.sh
rename to examples/baichuan/generate_baichuan_7b_ptd.sh
index d0caad80a00a0ca2f5287cb333d354b256204e7f..8a76d60f34143dd3fb4345370755c6122f2adcd6 100644
--- a/tasks/inference/generate_baichuan_7b_ptd.sh
+++ b/examples/baichuan/generate_baichuan_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/examples/baichuan2/README.md b/examples/baichuan2/README.md
index 820d8676b7d13d4de2df3fa8e7b1be77e74c14f9..4b4ebb178b7f39f5c386f860eed0ce25dccbaa90 100644
--- a/examples/baichuan2/README.md
+++ b/examples/baichuan2/README.md
@@ -100,7 +100,7 @@ mkdir baichuan2-7B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -118,7 +118,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -186,7 +186,7 @@ Baichuan2-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-首先需要配置baichuan2-7B的推理脚本: tasks/inference/generate_baichuan2_7b_ptd.sh
+首先需要配置baichuan2-7B的推理脚本: examples/baichuan2/generate_baichuan2_7b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -200,10 +200,11 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan2_7b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan2_7b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```
 
 推理的示例如下:
+
 ![Inference](../../sources/images/baichuan2/baichuan2_7B_inference.png)
 
 ## 评估
@@ -220,7 +221,7 @@ TASK="boolq"
 ```
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_7B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
 ```
 
 <table>
@@ -320,7 +321,7 @@ mkdir baichuan2-13B-mt
 # 修改 ascend-toolkit 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -338,7 +339,7 @@ python tools/checkpoint/util.py \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -404,7 +405,7 @@ Baichuan2-13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 
 ## 推理
 
-首先需要配置baichuan2-13B的推理脚本: tasks/inference/generate_baichuan2_13b_ptd.sh
+首先需要配置baichuan2-13B的推理脚本: examples/baichuan2/generate_baichuan2_13b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -418,10 +419,11 @@ TOKENIZER_PATH="your tokenizer directory path"
 然后可直接启动generate_baichuan2_13b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_baichuan2_13b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```
 
 推理的示例如下:
+
 ![Inference](../../sources/images/baichuan2/baichuan2_13B_inference.png)
 
 ## 评估
@@ -440,7 +442,7 @@ TASK="boolq"
 
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
 ```
 
 <table>
diff --git a/examples/baichuan2/README_en.md b/examples/baichuan2/README_en.md
index e8a12cc25c8645c5b0ec47b93eab2d3dd2e4d403..d6954cc94a57d66578f7953331e5784f06c6b76c 100644
--- a/examples/baichuan2/README_en.md
+++ b/examples/baichuan2/README_en.md
@@ -104,7 +104,7 @@ mkdir baichuan2-7B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -121,7 +121,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -190,7 +190,7 @@ The performance of Baichuan2-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan2-7B inference script: tasks/inference/generate_baichuan2_7b_ptd.sh
+Config baichuan2-7B inference script: examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -199,9 +199,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan2-7B inference script: tasks/inference/generate_baichuan2_7b_ptd.sh
+Launch baichuan2-7B inference script: examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan2_7b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_7b_ptd.sh
 ```
 Some inference samples are as follows:
 ![Inference](../../sources/images/baichuan2/baichuan2_7B_inference.png)
@@ -220,7 +220,7 @@ TASK="boolq"
 ```
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
 ```
 
 <table>
@@ -324,7 +324,7 @@ mkdir baichuan2-13B-mt
 # modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
-python tools/checkpoint/util.py \
+python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -342,7 +342,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -405,7 +405,7 @@ The performance of the Baichuan2-13B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config baichuan2-13B inference script: tasks/inference/generate_baichuan2_13b_ptd.sh
+Config baichuan2-13B inference script: examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -414,9 +414,9 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
-Launch baichuan2-13B inference script: tasks/inference/generate_baichuan2_13b_ptd.sh
+Launch baichuan2-13B inference script: examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```bash
-bash tasks/inference/generate_baichuan2_13b_ptd.sh
+bash examples/baichuan2/generate_baichuan2_13b_ptd.sh
 ```
 Some inference samples are as follows:
 ![Inference](../../sources/images/baichuan2/baichuan2_13B_inference.png)
@@ -435,7 +435,7 @@ TASK="boolq"
 ```
 
 ```shell
-bash ./tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+bash ./examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
 ```
 
 <table>
diff --git a/tasks/evaluation/evaluate_baichuan2_13B_ptd.sh b/examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
similarity index 95%
rename from tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
rename to examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
index 9708a65f288cad8193574d3e9894d09f3977bea4..7be0e6f3212740fcca39a034d4be01e4a1c63e09 100644
--- a/tasks/evaluation/evaluate_baichuan2_13B_ptd.sh
+++ b/examples/baichuan2/evaluate_baichuan2_13B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_baichuan2_7B_ptd.sh b/examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_baichuan2_7B_ptd.sh
rename to examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
index 3cdba8eb2d02e7e16ff1420a05c764d605e61c80..f742d8ac89309f9ab90d3689e1d4bddf770a32e8 100644
--- a/tasks/evaluation/evaluate_baichuan2_7B_ptd.sh
+++ b/examples/baichuan2/evaluate_baichuan2_7B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/inference/generate_baichuan2_13b_ptd.sh b/examples/baichuan2/generate_baichuan2_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan2_13b_ptd.sh
rename to examples/baichuan2/generate_baichuan2_13b_ptd.sh
index 783edb8164e3371aa3eb591501522913b16747fa..76c9655a945f4db191f7f223e47eb9c3af70f3a6 100644
--- a/tasks/inference/generate_baichuan2_13b_ptd.sh
+++ b/examples/baichuan2/generate_baichuan2_13b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_baichuan2_7b_ptd.sh b/examples/baichuan2/generate_baichuan2_7b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_baichuan2_7b_ptd.sh
rename to examples/baichuan2/generate_baichuan2_7b_ptd.sh
index f06b11100cce599ed6dd7d3a7a2c0f2ad5d72594..61927d73eba5ca78a837c648c2a20d14d56e14f2 100644
--- a/tasks/inference/generate_baichuan2_7b_ptd.sh
+++ b/examples/baichuan2/generate_baichuan2_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/examples/bloom/README.md b/examples/bloom/README.md
index da3a7a7cb47b97848265d90be19e3c4b46eb43df..09319f46fcd60e01c9c895d21658989521ec17c3 100644
--- a/examples/bloom/README.md
+++ b/examples/bloom/README.md
@@ -71,7 +71,7 @@ cd ..
 将模型权重文件从 HuggingFace权重 格式转化为 Megatron 权重
 ***（该场景一般用于使能开源的HuggingFace模型在Megatron上进行训练）***
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -87,7 +87,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -153,7 +153,7 @@ Bloom-7B
 
 
 ## Bloom-7B推理
-首先配置Bloom-7B 推理脚本: tasks/inference/generate_bloom_ptd_7B.sh 
+首先配置Bloom-7B 推理脚本: examples/bloom/generate_bloom_ptd_7B.sh 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -166,7 +166,7 @@ TOKENIZER_PATH="your tokenizer path"
 然后可直接启动generate_bloom_7b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_bloom_7b_ptd.sh
+bash examples/bloom/generate_bloom_7b_ptd.sh
 ```
 推理示例如下：
 
@@ -174,7 +174,7 @@ bash tasks/inference/generate_bloom_7b_ptd.sh
 
 ## Bloom-7B评测
 
-配置Bloom-7B 评估脚本: tasks/evaluation/evaluate_bloom_7b_ptd.sh
+配置Bloom-7B 评估脚本: examples/bloom/evaluate_bloom_7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -191,7 +191,7 @@ TASK="your task"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_7B_ptd.sh
+bash examples/bloom/evaluate_bloom_7B_ptd.sh
 ```
 MMLU评测得分
 
@@ -269,7 +269,7 @@ cd ..
 将模型权重文件从 HuggingFace权重 格式转化为 Megatron 权重
 ***（该场景一般用于使能开源的HuggingFace模型在Megatron上进行训练）***
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -288,7 +288,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -358,7 +358,7 @@ Bloom-176B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 
 
 ## 推理
-首先配置Bloom-176B 推理脚本: tasks/inference/generate_bloom_ptd_176B.sh 
+首先配置Bloom-176B 推理脚本: examples/bloom/generate_bloom_ptd_176B.sh 
 bloom 176b的推理需要5机，因此要用上面的  权重转换脚本重新切分，tp=8，pp=5
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -372,7 +372,7 @@ TOKENIZER_PATH="your tokenizer path"
 然后可直接启动generate_bloom_176b_ptd.sh
 
 ```bash
-bash tasks/inference/generate_bloom_176b_ptd.sh
+bash examples/bloom/generate_bloom_176b_ptd.sh
 ```
 推理示例如下：
 
@@ -381,7 +381,7 @@ bash tasks/inference/generate_bloom_176b_ptd.sh
 
 ## 评估 
 
-配置Bloom-176B 评估脚本: tasks/evaluation/evaluate_bloom_176b_ptd.sh
+配置Bloom-176B 评估脚本: examples/bloom/evaluate_bloom_176b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -398,7 +398,7 @@ TASK="your task"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_176B_ptd.sh
+bash examples/bloom/evaluate_bloom_176B_ptd.sh
 ```
 评测得分
 
diff --git a/examples/bloom/README_en.md b/examples/bloom/README_en.md
index dc4389421a33038dcd5a455df28a7724d66f9a32..18444553a20776bf0c2927580df4f56251c3c324 100644
--- a/examples/bloom/README_en.md
+++ b/examples/bloom/README_en.md
@@ -73,7 +73,7 @@ HuggingFace weights --> Megatron weights
 ***(This scenario is generally used to train open-source HuggingFace models on Megatron)***
 
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -89,7 +89,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -160,7 +160,7 @@ The performance of Bloom-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference Bloom-7B
-Config Bloom-7B inference script: tasks/inference/generate_bloom_7b_ptd.sh
+Config Bloom-7B inference script: examples/bloom/generate_bloom_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -169,16 +169,16 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model save ckpt path"
 TOKENIZER_PATH="your tokenizer path"
 ```
-Launch Bloom-7B inference script: tasks/inference/generate_bloom_7b_ptd.sh
+Launch Bloom-7B inference script: examples/bloom/generate_bloom_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_bloom_7b_ptd.sh
+bash examples/bloom/generate_bloom_7b_ptd.sh
 ```
 Some inference samples are as follows:
 
 ![Inference](../../sources/images/bloom/bloom7b-generate.png)
 
 ## Evaluation Bloom-7B
-Config Bloom-7B evaluation script: tasks/evaluation/evaluate_bloom_7B_ptd.sh
+Config Bloom-7B evaluation script: examples/bloom/evaluate_bloom_7B_ptd.sh
 
 ```bash
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -193,7 +193,7 @@ TASK="your task"
 Launch Bloom-7B evaluation script:
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_7B_ptd.sh
+bash examples/bloom/evaluate_bloom_7B_ptd.sh
 ```
 
 Evaluation results
@@ -274,7 +274,7 @@ HuggingFace weights --> Megatron weights
 ***(This scenario is generally used to train open-source HuggingFace models on Megatron)***
 
 ```shell
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader loader_bloom_hf \
                                 --saver saver_megatron \
                                 --target-tensor-parallel-size 8 \
@@ -291,7 +291,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -361,7 +361,7 @@ The performance of Bloom-176B in **Ascend NPU** and **Reference**:
 
 
 ## Inference Bloom 176B
-Config Bloom-176B inference script: tasks/inference/generate_bloom_176b_ptd.sh
+Config Bloom-176B inference script: examples/bloom/generate_bloom_176b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -370,18 +370,19 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 CHECKPOINT="your model save ckpt path"
 TOKENIZER_PATH="your tokenizer path"
 ```
-Launch Bloom-176B inference script: tasks/inference/generate_bloom_176b_ptd.sh
+Launch Bloom-176B inference script: examples/bloom/generate_bloom_176b_ptd.sh
+
 Bloom-176b needs 5 machines to inference, so you need to convert a new model, set
 tp=8, pp=5
 ```bash
-bash tasks/inference/generate_bloom_176b_ptd.sh
+bash examples/bloom/generate_bloom_176b_ptd.sh
 ```
 Some inference samples are as follows:
 
 ![Inference](../../sources/images/bloom/bloom176b-generate.png)
 
 ## Evaluation Bloom 176B
-Config Bloom-176B evaluation script: tasks/evaluation/evaluate_bloom_176B_ptd.sh
+Config Bloom-176B evaluation script: examples/bloom/evaluate_bloom_176B_ptd.sh
 
 ```bash
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -396,7 +397,7 @@ TASK="your task"
 Launch Bloom-176B evaluation script:
 
 ```bash
-bash tasks/evaluation/evaluate_bloom_176B_ptd.sh
+bash examples/bloom/evaluate_bloom_176B_ptd.sh
 ```
 
 Evaluation results
diff --git a/tasks/evaluation/evaluate_bloom_176b_ptd.sh b/examples/bloom/evaluate_bloom_176b_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_bloom_176b_ptd.sh
rename to examples/bloom/evaluate_bloom_176b_ptd.sh
index dcf3f0f223b4cd92bedff7ff81aa2ccbd8b70ec3..b9f235139028618b9dd2c6206141a46b0a52dae5 100644
--- a/tasks/evaluation/evaluate_bloom_176b_ptd.sh
+++ b/examples/bloom/evaluate_bloom_176b_ptd.sh
@@ -22,7 +22,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py \
     --tensor-model-parallel-size 8 \
     --pipeline-model-parallel-size 5 \
     --num-layers 70 \
diff --git a/tasks/evaluation/evaluate_bloom_7b_ptd.sh b/examples/bloom/evaluate_bloom_7b_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_bloom_7b_ptd.sh
rename to examples/bloom/evaluate_bloom_7b_ptd.sh
index c6218a0fd3b5e80f8a3827f7dd513b47b19e3f1b..8ed1d49947b539baca2b2e123b5e886f394167ae 100644
--- a/tasks/evaluation/evaluate_bloom_7b_ptd.sh
+++ b/examples/bloom/evaluate_bloom_7b_ptd.sh
@@ -17,7 +17,7 @@ NODE_RANK=0
 NPUS_PER_NODE=8
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 # configure generation parameters
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --tensor-model-parallel-size 8 \
diff --git a/tasks/inference/generate_bloom_176b_ptd.sh b/examples/bloom/generate_bloom_176b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_bloom_176b_ptd.sh
rename to examples/bloom/generate_bloom_176b_ptd.sh
index 4703a7949b9622523ffcef46c35cebe22abef415..145a81bf3c0ee6a975dacf29c0c3c8e1ca876e95 100644
--- a/tasks/inference/generate_bloom_176b_ptd.sh
+++ b/examples/bloom/generate_bloom_176b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
     --tensor-model-parallel-size 8 \
     --pipeline-model-parallel-size 5 \
     --num-layers 70 \
diff --git a/tasks/inference/generate_bloom_7b_ptd.sh b/examples/bloom/generate_bloom_7b_ptd.sh
similarity index 92%
rename from tasks/inference/generate_bloom_7b_ptd.sh
rename to examples/bloom/generate_bloom_7b_ptd.sh
index 781d06b0d206a465759bd5dea3536353f1ae37e4..117b81df985ecb05752d402138c2342286ae0280 100644
--- a/tasks/inference/generate_bloom_7b_ptd.sh
+++ b/examples/bloom/generate_bloom_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
     --tensor-model-parallel-size 8 \
     --pipeline-model-parallel-size 1 \
     --sequence-parallel \
diff --git a/examples/intern/README.md b/examples/intern/README.md
index 8cc71e20fe689d30ae396b57195413f532c8ce7f..4ce9264037c50b98fa26e42af9b2788caa9e7216 100644
--- a/examples/intern/README.md
+++ b/examples/intern/README.md
@@ -122,7 +122,7 @@ cd ..
 ***（该场景一般用于使能开源的HuggingFace模型在Megatron上进行训练）***
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -140,7 +140,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -186,9 +186,9 @@ Internlm-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 #### 推理
 推理脚本</a>：
-tasks/inference/generate_lnternlm_7b_ptd.sh
+examples/intern/generate_internlm_7b_ptd.sh
 ```
-bash ./tasks/inference/generate_lnternlm_7b_ptd.sh
+bash ./examples/intern/generate_internlm_7b_ptd.sh
 ```
 推理举例：
 ![Internlm-7b-inference](../../sources/images/intern/intern_7B_inference.png)
@@ -197,9 +197,9 @@ bash ./tasks/inference/generate_lnternlm_7b_ptd.sh
 使用MMLU数据集评估模型。数据集[下载](https://huggingface.co/datasets/cais/mmlu)
 
 评估脚本</a>:
-tasks/evaluation/evaluate_internlm_7B_ptd.sh 
+examples/intern/evaluate_internlm_7B_ptd.sh 
 ```
-bash  tasks/evaluation/evaluate_internlm_7B_ptd.sh
+bash  examples/intern/evaluate_internlm_7B_ptd.sh
 ```
 InternLM-7B在**Ascend NPU**中的评测表现：
 
diff --git a/examples/intern/README_en.md b/examples/intern/README_en.md
index d137d7cb3a3aa097d7fb3c6e8d34ec2566d62a2f..7664d4ca5209dfffd907dda3b9e7e1c2385f4dd2 100644
--- a/examples/intern/README_en.md
+++ b/examples/intern/README_en.md
@@ -122,7 +122,7 @@ In order to adapt to the internlm-7B model, the following script is used to conv
 ***(This scenario is generally used to train open-source HuggingFace models on Megatron)***
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -140,7 +140,7 @@ Any Megatron weights with parallel slicing strategy --> Any Megatron weights wit
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -185,9 +185,9 @@ The performance of Internlm-7B in **Ascend NPU** and **Reference**:
 
 #### Inference
 Inference script</a>：
-tasks/inference/generate_lnternlm_7b_ptd.sh
+examples/intern/generate_lnternlm_7b_ptd.sh
 ```
-bash ./tasks/inference/generate_lnternlm_7b_ptd.sh
+bash ./examples/intern/generate_lnternlm_7b_ptd.sh
 ```
 
 Inference case:
@@ -197,9 +197,9 @@ Inference case:
 Evaluating the model using the MMLU dataset. dataset [download](https://huggingface.co/datasets/cais/mmlu)
 
 Evaluation script</a>:
-tasks/evaluation/evaluate_internlm_7B_ptd.sh 
+examples/intern/evaluate_internlm_7B_ptd.sh 
 ```
-bash  tasks/evaluation/evaluate_internlm_7B_ptd.sh
+bash  examples/intern/evaluate_internlm_7B_ptd.sh
 ```
 The evaluation performance of LLaMA-7B/13B in **Ascend NPU**:
 
diff --git a/tasks/evaluation/evaluate_internlm_7B_ptd.sh b/examples/intern/evaluate_internlm_7B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_internlm_7B_ptd.sh
rename to examples/intern/evaluate_internlm_7B_ptd.sh
index 23d32949f34f7dbd88453de6df47ac93a36e34dd..719a1cc7ef3cdb005cdb358786947984c5b1c07a 100644
--- a/tasks/evaluation/evaluate_internlm_7B_ptd.sh
+++ b/examples/intern/evaluate_internlm_7B_ptd.sh
@@ -22,7 +22,7 @@ NODE_RANK=0
 NPUS_PER_NODE=8
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 # configure generation parameters 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --add-qkv-bias \
diff --git a/tasks/inference/generate_lnternlm_7b_ptd.sh b/examples/intern/generate_internlm_7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_lnternlm_7b_ptd.sh
rename to examples/intern/generate_internlm_7b_ptd.sh
index c25481bfe0498837bdb70e49909f0e0eba687d5c..632f79d9b18dbbef78c6cf0fb96e45fa75440725 100644
--- a/tasks/inference/generate_lnternlm_7b_ptd.sh
+++ b/examples/intern/generate_internlm_7b_ptd.sh
@@ -22,7 +22,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --add-qkv-bias \
diff --git a/examples/llama/README.md b/examples/llama/README.md
index 7fc40344ef9053af32aca30cfcc0790bfb3eff7b..7c8a69b5be26dbf22718f06d44b25d6db20477e0 100644
--- a/examples/llama/README.md
+++ b/examples/llama/README.md
@@ -95,7 +95,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
 # 权重格式转换
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 1 \
@@ -114,7 +114,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 # 权重格式转换
 # 单机8卡
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 1 \
@@ -131,7 +131,7 @@ LLaMA-7B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -146,7 +146,7 @@ LLaMA-13B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -305,13 +305,13 @@ SAVE_CHECKPOINT_PATH="your model ckpt save path"
 
 LLaMA-7B
 ```shell
-bash tasks/finetune/tune_llama_7b_ptd.sh
+bash examples/llama/tune_llama_7b_ptd.sh
 ```
 
 LLaMA-13B
 ```shell
 # 单机8卡
-bash tasks/finetune/tune_llama_13b_ptd.sh 
+bash examples/llama/tune_llama_13b_ptd.sh 
 ```
 
 ### 性能
@@ -335,7 +335,7 @@ LLaMA-7B/13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 我们支持使用 LLaMA-7B 和 LLaMA-13B 进行文本生成的推理。
 推理与预训练不同，比如我们需要加载预训练权重和输出样本的长度：
 
-配置LLaMA-7B推理脚本`tasks/inference/generate_llama_7b_ptd.sh`和LLaMA-13B推理脚本`tasks/inference/generate_llama_13b_ptd.sh`。
+配置LLaMA-7B推理脚本`examples/llama/generate_llama_7b_ptd.sh`和LLaMA-13B推理脚本`examples/llama/generate_llama_13b_ptd.sh`。
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -345,27 +345,27 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-7B:
 ```shell
-bash ./tasks/inference/generate_llama_7b_ptd.sh
+bash ./examples/llama/generate_llama_7b_ptd.sh
 ```
 
 LLaMA-13B:
 ```shell
-bash ./tasks/inference/generate_llama_13b_ptd.sh
+bash ./examples/llama/generate_llama_13b_ptd.sh
 ```
 
-配置LLaMA-7B lora推理脚本`tasks/inference/generate_llama_7b_lora_ptd.sh`和LLaMA-13B lora推理脚本`tasks/inference/generate_llama_13b_lora_ptd.sh`。
+配置LLaMA-7B lora推理脚本`examples/llama/generate_llama_7b_lora_ptd.sh`和LLaMA-13B lora推理脚本`examples/llama/generate_llama_13b_lora_ptd.sh`。
 ```bash
 # 修改lora权重路径
 CHECKPOINT_LORA="your lora model directory path"
 ```
 LLaMA-7B:
 ```shell
-bash ./tasks/inference/generate_llama_7b_lora_ptd.sh
+bash ./examples/llama/generate_llama_7b_lora_ptd.sh
 ```
 
 LLaMA-13B:
 ```shell
-bash ./tasks/inference/generate_llama_13b_lora_ptd.sh
+bash ./examples/llama/generate_llama_13b_lora_ptd.sh
 ```
 
 部分推理样本如下：
@@ -384,7 +384,7 @@ LLaMA-13B:
 我们使用 Boolq benchmark 来评估我们的模型。Benchmark下载[此处](https://huggingface.co/datasets/boolq)。
 
 
-配置LLaMA-7B评估脚本 `tasks/evaluation/evaluate_llama_7B_ptd.sh` 和 LLaMA-13B评估脚本 `tasks/evaluation/evaluate_llama_13B_ptd.sh`：
+配置LLaMA-7B评估脚本 `examples/llama/evaluate_llama_7B_ptd.sh` 和 LLaMA-13B评估脚本 `examples/llama/evaluate_llama_13B_ptd.sh`：
 
 修改权重路径, 词表路径和数据集任务路径：
 ```shell
@@ -401,8 +401,8 @@ TASK="boolq"
 
 开始评估：
 ```shell
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
-bash tasks/evaluation/evaluate_llama_13B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_13B_ptd.sh
 ```
 
 LLaMA-7B/13B在**Ascend NPU**中的评测表现：
@@ -511,7 +511,7 @@ python $SCRIPT_PATH \
 llama-65B
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -528,7 +528,7 @@ llama-33B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -543,7 +543,7 @@ llama-65B
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -719,13 +719,13 @@ SAVE_CHECKPOINT_PATH="your model ckpt save path"
 
 LLaMA-33B
 ```shell
-bash tasks/finetune/tune_llama_33b_ptd.sh
+bash examples/llama/tune_llama_33b_ptd.sh
 ```
 
 LLaMA-65B
 ```shell
 # 双机16卡
-bash tasks/finetune/tune_llama_65b_ptd.sh 
+bash examples/llama/tune_llama_65b_ptd.sh 
 ```
 
 ### 性能
@@ -748,9 +748,9 @@ LLaMA-33B/65B在 **昇腾芯片** 和 **参考芯片** 上的性能对比:
 我们支持使用 LLaMA-33B 和 LLaMA-65B 进行文本生成的推理。
 推理与预训练不同，比如我们需要加载预训练权重和输出样本的长度：
 
-配置LLaMA-33B推理脚本`tasks/inference/generate_llama_33b_ptd.sh`。
+配置LLaMA-33B推理脚本`examples/llama/generate_llama_33b_ptd.sh`。
 
-配置LLaMA-65B推理脚本`tasks/inference/generate_llama_65b_ptd.sh`。
+配置LLaMA-65B推理脚本`examples/llama/generate_llama_65b_ptd.sh`。
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -760,16 +760,16 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-33B:
 ```shell
-bash ./tasks/inference/generate_llama_33b_ptd.sh
+bash ./examples/llama/generate_llama_33b_ptd.sh
 ```
 LLaMA-65B:
 ```shell
-bash ./tasks/inference/generate_llama_65b_ptd.sh
+bash ./examples/llama/generate_llama_65b_ptd.sh
 ```
 
-配置LLaMA-33B lora推理脚本`tasks/inference/generate_llama_33B_lora_ptd.sh`。
+配置LLaMA-33B lora推理脚本`examples/llama/generate_llama_33B_lora_ptd.sh`。
 
-配置LLaMA-65B lora推理脚本`tasks/inference/generate_llama_65B_lora_ptd.sh`。
+配置LLaMA-65B lora推理脚本`examples/llama/generate_llama_65B_lora_ptd.sh`。
 
 ```bash
 # 修改lora权重路径
@@ -777,12 +777,12 @@ CHECKPOINT_LORA="your lora model directory path"
 ```
 LLaMA-33B:
 ```shell
-bash ./tasks/inference/generate_llama_33b_lora_ptd.sh
+bash ./examples/llama/generate_llama_33b_lora_ptd.sh
 ```
 
 LLaMA-65B:
 ```shell
-bash ./tasks/inference/generate_llama_65b_lora_ptd.sh
+bash ./examples/llama/generate_llama_65b_lora_ptd.sh
 ```
 
 部分推理样本如下：
@@ -799,9 +799,9 @@ LLaMA-65B:
 
 我们使用 Boolq benchmark 来评估我们的模型。Benchmark下载[此处](https://huggingface.co/datasets/boolq)。
 
-配置LLaMA-33B评估脚本：tasks/evaluation/evaluate_llama_33B_ptd.sh
+配置LLaMA-33B评估脚本：examples/llama/evaluate_llama_33B_ptd.sh
 
-配置LLaMA-65B评估脚本：tasks/evaluation/evaluate_llama_65B_ptd.sh
+配置LLaMA-65B评估脚本：examples/llama/evaluate_llama_65B_ptd.sh
 
 修改权重路径, 词表路径和数据集任务路径：
 ```shell
@@ -819,9 +819,9 @@ TASK="boolq"
 
 ```shell
 # llama-33B评估
-bash tasks/evaluation/evaluate_llama_33B_ptd.sh
+bash examples/llama/evaluate_llama_33B_ptd.sh
 # llama-65B评估
-bash tasks/evaluation/evaluate_llama_65B_ptd.sh
+bash examples/llama/evaluate_llama_65B_ptd.sh
 ```
 
 LLaMA-33B和LLaMA-65B在**Ascend NPU**中的评测表现：
diff --git a/examples/llama/README_en.md b/examples/llama/README_en.md
index 765cdccbfdedae53efcb03b9f7a8890714b55889..4ad65869c5d106583ec64c76742656bb65d45d85 100644
--- a/examples/llama/README_en.md
+++ b/examples/llama/README_en.md
@@ -95,7 +95,7 @@ LLaMA-7B
 ```shell
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
     --target-tensor-parallel-size 1 \
@@ -109,7 +109,7 @@ LLaMA-13B
 ```shell
 # Single machine with 8p
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
     --target-tensor-parallel-size 1 \
@@ -127,7 +127,7 @@ LLaMA-7B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -142,7 +142,7 @@ LLaMA-13B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -300,13 +300,13 @@ Add the fine-tuning parameter `--finetune` so that fine-tuning starts from the f
 
 LLaMA-7B
 ```shell
-bash tasks/finetune/tune_llama_7b_ptd.sh
+bash examples/llama/tune_llama_7b_ptd.sh
 ```
 
 LLaMA-13B
 ```shell
 # 8p
-bash tasks/finetune/tune_llama_13b_ptd.sh 
+bash examples/llama/tune_llama_13b_ptd.sh 
 ```
 
 
@@ -330,7 +330,7 @@ The performance of LLaMA-7B/13B in **Ascend NPU** and **Reference**:
 We support ModelLink Inference for text generation with LLaMA-7B and LLaMA-13B.
 Inference different from pre-training, such as we need to Load pre-training checkpoint and the length of the output samples:
 
-Config LLaMA-7B inference script `tasks/inference/generate_llama_7b_ptd.sh` and LLaMA-13B inference script `tasks/inference/generate_llama_13b_ptd.sh`.
+Config LLaMA-7B inference script `examples/llama/generate_llama_7b_ptd.sh` and LLaMA-13B inference script `examples/llama/generate_llama_13b_ptd.sh`.
 
 ```shell
 # modify the model weight path and tokenizer path
@@ -340,12 +340,12 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-7B:
 ```shell
-bash ./tasks/inference/generate_llama_7b_ptd.sh
+bash ./examples/llama/generate_llama_7b_ptd.sh
 ```
 
 LLaMA-13B:
 ```shell
-bash ./tasks/inference/generate_llama_13b_ptd.sh
+bash ./examples/llama/generate_llama_13b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -363,7 +363,7 @@ LLaMA-13B:
 
 We use boolq benchmark to evaluate our model. Benchmark Download [here](https://huggingface.co/datasets/boolq).
 
-Config LLaMA-7B evaluation script `tasks/evaluation/evaluate_llama_7B_ptd.sh` and LLaMA-13B evaluation script `tasks/evaluation/evaluate_llama_13B_ptd.sh`:
+Config LLaMA-7B evaluation script `examples/llama/evaluate_llama_7B_ptd.sh` and LLaMA-13B evaluation script `examples/llama/evaluate_llama_13B_ptd.sh`:
 
 Modify checkpoint path, vocab path, dataset path and task:
 
@@ -386,8 +386,8 @@ Change the max new tokens:
 
 Start evaluation:
 ```shell
-bash tasks/evaluation/evaluate_llama_7B_ptd.sh
-bash tasks/evaluation/evaluate_llama_13B_ptd.sh
+bash examples/llama/evaluate_llama_7B_ptd.sh
+bash examples/llama/evaluate_llama_13B_ptd.sh
 ```
 
 The evaluation performance of LLaMA-7B/13B in **Ascend NPU**:
@@ -486,7 +486,7 @@ llama-33B
 ```shell
 mkdir model_weights
 
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 4 \
@@ -499,7 +499,7 @@ python tools/checkpoint/util.py --model-type GPT \
 llama-65B
 ```shell
 mkdir model_weights
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                 --loader llama2_hf \
                                 --saver megatron \
                                 --target-tensor-parallel-size 8 \
@@ -516,7 +516,7 @@ LLaMA-33B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -531,7 +531,7 @@ LLaMA-65B
 cd ModelLink/
 # Modify the ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -696,12 +696,12 @@ Add the fine-tuning parameter `--finetune` so that fine-tuning starts from the f
 
 Launch llama-33B pre-training script : ModelLink/examples/llama/tune_llama_33B_ptd_32p.sh
 ```bash
-bash tasks/finetune/tune_llama_33B_ptd_32p.sh
+bash examples/llama/tune_llama_33B_ptd_32p.sh
 ```
 
 Launch llama-65B pre-training script : ModelLink/examples/llama/tune_llama_65b_ptd.sh
 ```bash
-bash tasks/finetune/tune_llama_65b_ptd.sh
+bash examples/llama/tune_llama_65b_ptd.sh
 ```
 Config llama-33B/65B pre-training script for multinode (Launch llama-65B pre-training script on each machine):
 
@@ -732,9 +732,9 @@ The performance of the NPUs in **Ascend** and Reference:
 We support ModelLink Inference for text generation with LLaMA-33B and LLaMA-65B.
 Inference different from pre-training, such as we need to Load pre-training checkpoint and the length of the output samples:
 
-Config LLaMA-33B inference script `tasks/inference/generate_llama_33b_ptd.sh`.
+Config LLaMA-33B inference script `examples/llama/generate_llama_33b_ptd.sh`.
 
-Config LLaMA-65B inference script `tasks/inference/generate_llama_65b_ptd.sh`.
+Config LLaMA-65B inference script `examples/llama/generate_llama_65b_ptd.sh`.
 
 ```shell
 # modify the model weight path and tokenizer path
@@ -744,11 +744,11 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA-33B:
 ```shell
-bash ./tasks/inference/generate_llama_33b_ptd.sh
+bash ./examples/llama/generate_llama_33b_ptd.sh
 ```
 LLaMA-65B:
 ```shell
-bash ./tasks/inference/generate_llama_65b_ptd.sh
+bash ./examples/llama/generate_llama_65b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -766,9 +766,9 @@ LLaMA-65B:
 
 We use Boolq benchmark to evaluate our model. Benchmark Download [here](https://huggingface.co/datasets/boolq).
 
-Config LLaMA-33B evaluation script: tasks/evaluation/evaluate_llama_33B_ptd.sh
+Config LLaMA-33B evaluation script: examples/llama/evaluate_llama_33B_ptd.sh
 
-Config LLaMA-65B evaluation script: tasks/evaluation/evaluate_llama_65B_ptd.sh
+Config LLaMA-65B evaluation script: examples/llama/evaluate_llama_65B_ptd.sh
 
 Modify checkpoint path, vocab path, dataset path and task:
 
@@ -786,9 +786,9 @@ Change the max new tokens:
 ```shell
 # start evaluation
 # evaluate llama-33B
-bash tasks/evaluation/evaluate_llama_33B_ptd.sh
+bash examples/llama/evaluate_llama_33B_ptd.sh
 # evaluate llama-65B
-bash tasks/evaluation/evaluate_llama_65B_ptd.sh
+bash examples/llama/evaluate_llama_65B_ptd.sh
 ```
 
 The evaluation performance of LLaMA-7B/13B in **Ascend NPU**:
diff --git a/tasks/evaluation/evaluate_llama_13B_ptd.sh b/examples/llama/evaluate_llama_13B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama_13B_ptd.sh
rename to examples/llama/evaluate_llama_13B_ptd.sh
index 4807adb34636d32017f754dfc8942be42d602652..da5f11e8ca7122848056cbb7321a9b0834c0d12f 100644
--- a/tasks/evaluation/evaluate_llama_13B_ptd.sh
+++ b/examples/llama/evaluate_llama_13B_ptd.sh
@@ -22,7 +22,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_llama_33B_ptd.sh b/examples/llama/evaluate_llama_33B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_llama_33B_ptd.sh
rename to examples/llama/evaluate_llama_33B_ptd.sh
index 80511ff556d6f47061089884608de41a366e78e2..e07bbbe9cdb07c01c38b818bdb117a762f72c0ab 100644
--- a/tasks/evaluation/evaluate_llama_33B_ptd.sh
+++ b/examples/llama/evaluate_llama_33B_ptd.sh
@@ -22,7 +22,7 @@ TOKENIZER_PATH="Your tokenizer path"
 DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_llama_65B_ptd.sh b/examples/llama/evaluate_llama_65B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama_65B_ptd.sh
rename to examples/llama/evaluate_llama_65B_ptd.sh
index 150209df85c5bb5621653db51cf8dc5065be8561..8ed13cc0aa4eb8e9a5f2bf61e9ebddc5b494d848 100644
--- a/tasks/evaluation/evaluate_llama_65B_ptd.sh
+++ b/examples/llama/evaluate_llama_65B_ptd.sh
@@ -20,7 +20,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_llama_7B_ptd.sh b/examples/llama/evaluate_llama_7B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama_7B_ptd.sh
rename to examples/llama/evaluate_llama_7B_ptd.sh
index 4b9148bcd14ccf7781e1066872a82db9ca342ec5..76dfcd3935e3d1a50bfd2da7d3026b6941a88857 100644
--- a/tasks/evaluation/evaluate_llama_7B_ptd.sh
+++ b/examples/llama/evaluate_llama_7B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/inference/generate_llama_13b_lora_ptd.sh b/examples/llama/generate_llama_13b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_13b_lora_ptd.sh
rename to examples/llama/generate_llama_13b_lora_ptd.sh
index c82a0daf88d90fae7b95c6f4b92880fb3c213abb..93d1b2fd69d1c22d3b56684c57627f50e329d96d 100644
--- a/tasks/inference/generate_llama_13b_lora_ptd.sh
+++ b/examples/llama/generate_llama_13b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 1  \
        --pipeline-model-parallel-size 8  \
        --num-layers 40 \
@@ -53,4 +53,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_13b_lora.log
diff --git a/tasks/inference/generate_llama_13b_ptd.sh b/examples/llama/generate_llama_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_13b_ptd.sh
rename to examples/llama/generate_llama_13b_ptd.sh
index 0b143e4d64fcd40f8df5a4f6dd8f8c164568f61f..47c6bd3f57949f7f83f50dd27ac824d12c6364cd 100644
--- a/tasks/inference/generate_llama_13b_ptd.sh
+++ b/examples/llama/generate_llama_13b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=4
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 2  \
        --pipeline-model-parallel-size 2  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_llama_33b_lora_ptd.sh b/examples/llama/generate_llama_33b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_33b_lora_ptd.sh
rename to examples/llama/generate_llama_33b_lora_ptd.sh
index 46b00020d0f2ae4ce1a4ff360cfe143007677fbe..bfcb62c18e2212df4aa04c3c5934fefae4e70558 100644
--- a/tasks/inference/generate_llama_33b_lora_ptd.sh
+++ b/examples/llama/generate_llama_33b_lora_ptd.sh
@@ -26,7 +26,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 4  \
        --pipeline-model-parallel-size 2  \
        --num-layers 60 \
@@ -59,4 +59,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-r 16 \
        --lora-alpha 32 \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_33b_lora.log
diff --git a/tasks/inference/generate_llama_33b_ptd.sh b/examples/llama/generate_llama_33b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_33b_ptd.sh
rename to examples/llama/generate_llama_33b_ptd.sh
index 42df9a98240a6e079375b7bed6a5eb5134f7864f..36dc704251dee107e4fb5bb6be53fda5b883ccc2 100644
--- a/tasks/inference/generate_llama_33b_ptd.sh
+++ b/examples/llama/generate_llama_33b_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 4  \
        --pipeline-model-parallel-size 2  \
        --num-layers 60  \
diff --git a/tasks/inference/generate_llama_65b_lora_ptd.sh b/examples/llama/generate_llama_65b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama_65b_lora_ptd.sh
rename to examples/llama/generate_llama_65b_lora_ptd.sh
index 4cf0d7230602ff27d241d8b1836ffca6bf0a0da8..b61efd8b1ab2dd04f1677bbcb6dfc6c8a23e73e2 100644
--- a/tasks/inference/generate_llama_65b_lora_ptd.sh
+++ b/examples/llama/generate_llama_65b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 2  \
        --num-layers 80 \
@@ -53,4 +53,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_65b_lora.log
diff --git a/tasks/inference/generate_llama_65b_ptd.sh b/examples/llama/generate_llama_65b_ptd.sh
similarity index 93%
rename from tasks/inference/generate_llama_65b_ptd.sh
rename to examples/llama/generate_llama_65b_ptd.sh
index 1a1ec0c3ca6f34a2c9e26972402d84807132e5ac..423a7a400c88b6fec860dedbc2daa4347df2bf19 100644
--- a/tasks/inference/generate_llama_65b_ptd.sh
+++ b/examples/llama/generate_llama_65b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 80 \
@@ -48,4 +48,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --no-gradient-accumulation-fusion \
        --exit-on-missing-checkpoint \
        --make-vocab-size-divisible-by 1 \
+       --inference_prompt_type 'alpaca' \
        | tee logs/generate_llama_65b.log
diff --git a/tasks/inference/generate_llama_7b_lora_ptd.sh b/examples/llama/generate_llama_7b_lora_ptd.sh
similarity index 92%
rename from tasks/inference/generate_llama_7b_lora_ptd.sh
rename to examples/llama/generate_llama_7b_lora_ptd.sh
index 66b8f52c73b959c89c495b5175242f06d4b4b0bc..3fde0684402603d52d595310d56130467387258d 100644
--- a/tasks/inference/generate_llama_7b_lora_ptd.sh
+++ b/examples/llama/generate_llama_7b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 1  \
        --pipeline-model-parallel-size 8  \
        --num-layers 32 \
@@ -53,4 +53,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama_7b_lora.log
\ No newline at end of file
diff --git a/tasks/inference/generate_llama_7b_ptd.sh b/examples/llama/generate_llama_7b_ptd.sh
similarity index 92%
rename from tasks/inference/generate_llama_7b_ptd.sh
rename to examples/llama/generate_llama_7b_ptd.sh
index ff91d88d959e2281ae035e893e48fb5c6ae76735..96a52d90fd7a845f284be4c815a13093f007d1f5 100644
--- a/tasks/inference/generate_llama_7b_ptd.sh
+++ b/examples/llama/generate_llama_7b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=4
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 2  \
        --pipeline-model-parallel-size 2  \
        --num-layers 32 \
diff --git a/tasks/finetune/tune_llama_13b_ptd.sh b/examples/llama/tune_llama_13b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama_13b_ptd.sh
rename to examples/llama/tune_llama_13b_ptd.sh
diff --git a/tasks/finetune/tune_llama_33b_ptd.sh b/examples/llama/tune_llama_33b_ptd.sh
similarity index 96%
rename from tasks/finetune/tune_llama_33b_ptd.sh
rename to examples/llama/tune_llama_33b_ptd.sh
index 6f8afbd988c788e90f5079845d46f9ddd551e757..daf0d758e83a272999899ef82fb320f954a37408 100644
--- a/tasks/finetune/tune_llama_33b_ptd.sh
+++ b/examples/llama/tune_llama_33b_ptd.sh
@@ -47,10 +47,10 @@ GPT_ARGS="
     --seq-length 2048 \
     --max-position-embeddings 2048 \
     --micro-batch-size 2 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1.5e-4 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/tasks/finetune/tune_llama_65b_ptd.sh b/examples/llama/tune_llama_65b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama_65b_ptd.sh
rename to examples/llama/tune_llama_65b_ptd.sh
diff --git a/tasks/finetune/tune_llama_7b_ptd.sh b/examples/llama/tune_llama_7b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama_7b_ptd.sh
rename to examples/llama/tune_llama_7b_ptd.sh
diff --git a/examples/llama2/README.md b/examples/llama2/README.md
index 62c323ab5569888a671a7f92ebfb29f163a8a0cd..7b885bb65a09b5a64fe35159f78b39775a7dd046 100755
--- a/examples/llama2/README.md
+++ b/examples/llama2/README.md
@@ -108,7 +108,7 @@ LLAMA2-7B 训练的硬件配置:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # 权重格式转换
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader llama2_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -125,7 +125,7 @@ LLAMA2-7B 训练的硬件配置:
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -232,10 +232,10 @@ python tools/checkpoint/util.py --model-type GPT \
      --lora-modules-to-save word_embeddings output_layer \
    ```
 
-   启动Lora微调脚本: tasks/finetune/tune_llama2_7b_ptd.sh
+   启动Lora微调脚本: examples/llama2/tune_llama2_7b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_7b_ptd.sh
+    bash examples/llama2/tune_llama2_7b_ptd.sh
    ```
 
 ### 性能
@@ -252,7 +252,7 @@ LLaMA2-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理-7B
 
-配置llama2-7B 推理脚本: tasks/inference/generate_llama2_7b_ptd.sh
+配置llama2-7B 推理脚本: examples/llama2/generate_llama2_7b_ptd.sh
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -264,7 +264,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 TOKENIZER_MODEL="your tokenizer.model file path"
 ```
 
-配置 LLaMA2-7B lora推理脚本: tasks/inference/generate_llama2_7b_lora_ptd.sh
+配置 LLaMA2-7B lora推理脚本: examples/llama2/generate_llama2_7b_lora_ptd.sh
 
 ```bash
 # 修改lora权重路径
@@ -274,12 +274,12 @@ CHECKPOINT_LORA="your lora model directory path"
 启动llama2-7B 推理脚本
 
 ```bash
-bash tasks/inference/generate_llama2_7b_ptd.sh
+bash examples/llama2/generate_llama2_7b_ptd.sh
 ```
 
 启动llama2-7B lora推理脚本
 ```bash
-bash tasks/inference/generate_llama2_7b_lora_ptd.sh
+bash examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```
 
 推理的示例如下:
@@ -288,7 +288,7 @@ bash tasks/inference/generate_llama2_7b_lora_ptd.sh
 ## 评估-7B
 
 使用 MMLU数据集评估模型. 数据集下载路径 [这里](https://huggingface.co/datasets/cais/mmlu). 
-配置llama2-7B 评估脚本: tasks/evaluation/evaluate_llama2_7B_ptd.sh
+配置llama2-7B 评估脚本: examples/llama2/evaluate_llama2_7B_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -305,7 +305,7 @@ TASK="mmlu"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_llama2_7B_ptd.sh
+bash examples/llama2/evaluate_llama2_7B_ptd.sh
 ```
 评估结果如下
 ```text
@@ -435,7 +435,7 @@ LLaMA2-13B 训练的硬件配置:
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
 # 权重格式转换
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
     --target-tensor-parallel-size 8 \
@@ -450,7 +450,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -557,10 +557,10 @@ python tools/checkpoint/util.py --model-type GPT \
      --lora-modules-to-save word_embeddings output_layer \
    ```
    
-   启动Lora微调脚本: tasks/finetune/tune_llama2_13b_ptd.sh
+   启动Lora微调脚本: examples/llama2/tune_llama2_13b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_13b_ptd.sh
+    bash examples/llama2/tune_llama2_13b_ptd.sh
    ```
 
 ### 性能
@@ -581,14 +581,14 @@ LLaMA2-13B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 我们在Llama2 13B中支持推理来生成文本。
 推理不同于预训练，比如我们需要加载预训练检查点和输出样本的长度:
 
-配置 LLaMA2-13B 推理脚本: tasks/inference/generate_llama2_13b_ptd.sh
+配置 LLaMA2-13B 推理脚本: examples/llama2/generate_llama2_13b_ptd.sh
 
 ```shell
 # 修改模型权重路径以及词表路径
 CHECKPOINT=./llama2-13b-tp8-pp1/
 TOKENIZER_PATH=./llama2-13b-hf/
 ```
-配置 LLaMA2-13B lora推理脚本: tasks/inference/generate_llama2_13b_lora_ptd.sh
+配置 LLaMA2-13B lora推理脚本: examples/llama2/generate_llama2_13b_lora_ptd.sh
 
 ```bash
 # 修改lora权重路径
@@ -596,11 +596,11 @@ CHECKPOINT_LORA="your lora model directory path"
 ```
 启动推理脚本
 ```shell
-bash ./tasks/inference/generate_llama2_13b_ptd.sh
+bash ./examples/llama2/generate_llama2_13b_ptd.sh
 ```
 启动lora推理脚本
 ```shell
-bash ./tasks/inference/generate_llama2_13b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_13b_lora_ptd.sh
 ```
 推理结果示例如下:
 ![llama2-13B-generate.png](../../sources/images/llama2/llama2-13B-generate.png)
@@ -617,7 +617,7 @@ bash ./tasks/inference/generate_llama2_13b_lora_ptd.sh
 ```
 
 ```shell
-bash tasks/evaluation/evaluate_llama2_13B_ptd.sh
+bash examples/llama2/evaluate_llama2_13B_ptd.sh
 ```
 
 <table>
@@ -761,7 +761,7 @@ pip install -r requirements.txt
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # 权重格式转换
-    python tools/checkpoint/util.py \
+    python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -797,7 +797,7 @@ pip install -r requirements.txt
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -812,7 +812,7 @@ python tools/checkpoint/util.py --model-type GPT \
 cd ModelLink/
 # 请按照您的真实环境修改 set_env.sh 路径
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py --model-type GPT \
+python tools/checkpoint/convert_ckpt.py --model-type GPT \
     --loader megatron \
     --saver megatron \
     --save-model-type save_huggingface_llama \
@@ -957,16 +957,16 @@ python tools/checkpoint/util.py --model-type GPT \
      --lora-modules-to-save word_embeddings output_layer \
    ```
 
-   启动llama2-34B Lora微调脚本: tasks/finetune/tune_llama2_34b_ptd.sh
+   启动llama2-34B Lora微调脚本: examples/llama2/tune_llama2_34b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_34b_ptd.sh
+    bash examples/llama2/tune_llama2_34b_ptd.sh
    ```
    
-   启动llama2-70B Lora微调脚本: tasks/finetune/tune_llama2_70b_ptd.sh
+   启动llama2-70B Lora微调脚本: examples/llama2/tune_llama2_70b_ptd.sh
 
    ```shell
-    bash tasks/finetune/tune_llama2_70b_ptd.sh
+    bash examples/llama2/tune_llama2_70b_ptd.sh
    ```
 
 ### 性能-2
@@ -990,9 +990,9 @@ LLaMA2-34B/70B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比
 
 配置推理脚本
 
-LLaMA2-34B:`tasks/inference/generate_llama2_34B_ptd.sh`。
+LLaMA2-34B:`examples/llama2/generate_llama2_34B_ptd.sh`。
 
-LLaMA2-70B:`tasks/inference/generate_llama2_70b_ptd.sh`。
+LLaMA2-70B:`examples/llama2/generate_llama2_70b_ptd.sh`。
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -1002,9 +1002,9 @@ TOKENIZER_PATH=<tokenizer-path>
 
 配置lora推理脚本
 
-LLaMA2-34B:`tasks/inference/generate_llama2_34b_lora_ptd.sh`。
+LLaMA2-34B:`examples/llama2/generate_llama2_34b_lora_ptd.sh`。
 
-LLaMA2-70B:`tasks/inference/generate_llama2_70b_lora_ptd.sh`。
+LLaMA2-70B:`examples/llama2/generate_llama2_70b_lora_ptd.sh`。
 
 ```bash
 # 修改lora权重路径
@@ -1013,19 +1013,19 @@ CHECKPOINT_LORA="your lora model directory path"
 
 LLaMA2-34B启动推理:
 ```shell
-bash ./tasks/inference/generate_llama2_34B_ptd.sh
+bash ./examples/llama2/generate_llama2_34B_ptd.sh
 ```
 LLaMA2-34B启动lora推理:
 ```shell
-bash ./tasks/inference/generate_llama2_34b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_34b_lora_ptd.sh
 ```
 LLaMA2-70B启动推理:
 ```shell
-bash ./tasks/inference/generate_llama2_70b_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_ptd.sh
 ```
 LLaMA2-70B启动lora推理:
 ```shell
-bash ./tasks/inference/generate_llama2_70b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_lora_ptd.sh
 ```
 
 LLaMA2-34B推理样例:
@@ -1042,9 +1042,9 @@ BoolQ数据集评估样例. 数据集[here](https://huggingface.co/datasets/bool
 
 配置评估脚本:
 
-LLaMA2-34B:`tasks/evaluation/evaluate_llama2_34B_ptd.sh`.
+LLaMA2-34B:`examples/llama2/evaluate_llama2_34B_ptd.sh`.
 
-LLaMA2-70B:`tasks/evaluation/evaluate_llama2_70B_ptd.sh`.
+LLaMA2-70B:`examples/llama2/evaluate_llama2_70B_ptd.sh`.
 
 ```shell
 # 修改模型权重路径和分词器路径
@@ -1054,11 +1054,11 @@ TOKENIZER_PATH=<tokenizer-path>
 
 LLaMA2-34B评估:
 ```shell
-bash tasks/evaluation/evaluate_llama2_34B_ptd.sh
+bash examples/llama2/evaluate_llama2_34B_ptd.sh
 ```
 LLaMA2-70B评估:
 ```shell
-bash tasks/evaluation/evaluate_llama2_70B_ptd.sh
+bash examples/llama2/evaluate_llama2_70B_ptd.sh
 ```
 
 BoolQ 数据集评估结果:
diff --git a/examples/llama2/README_en.md b/examples/llama2/README_en.md
index de54cbda7d33c9dab6d4a956d54ad6cd654d5ce4..16ff99950f46825ebc262d4c3cf59b1c2224fe36 100644
--- a/examples/llama2/README_en.md
+++ b/examples/llama2/README_en.md
@@ -115,7 +115,7 @@ Here's a hardware summary of pre-training  LLAMA2-7B:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # convert to ptd weights
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader llama2_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -130,7 +130,7 @@ Here's a hardware summary of pre-training  LLAMA2-7B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -233,10 +233,10 @@ Here's a hardware summary of pre-training  LLAMA2-7B:
        --lora-load ${LORA_CHECKPOINT} \
    ```
    
-   Launch LLAMA2-7B lora fine tune script: tasks/finetune/tune_llama2_7b_ptd.sh
+   Launch LLAMA2-7B lora fine tune script: examples/finetune/tune_llama2_7b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_7b_ptd.sh 
+    bash examples/llama2/tune_llama2_7b_ptd.sh 
    ```
 
 ### Performance
@@ -253,7 +253,7 @@ The performance of LLaMA2-7B in **Ascend NPU** and **Reference**:
 
 
 ## Inference-7B
-Config llama2-7B inference script: tasks/inference/generate_llama2_7b_ptd.sh
+Config llama2-7B inference script: examples/llama2/generate_llama2_7b_ptd.sh
 ```bash
 # modify the script according to your own ascend-toolkit path
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -262,16 +262,16 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
 TOKENIZER_PATH=./llama2-7b-hf/  #tokenizer path
 CHECKPOINT=./llama2-7b-tp8pp1  #model path
 ```
-Config llama2-7B lora inference script: tasks/inference/generate_llama2_7b_lora_ptd.sh
+Config llama2-7B lora inference script: examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```bash
 # modify lora model path
 CHECKPOINT_LORA="your lora model directory path"
 ```
-Launch llama2-7B inference script: tasks/inference/generate_llama2_7b_ptd.sh
+Launch llama2-7B inference script: examples/llama2/generate_llama2_7b_ptd.sh
 ```bash
 bash examples/llama2/generate_llama2_7b_ptd.sh
 ```
-Launch llama2-7B lora inference script: tasks/inference/generate_llama2_7b_lora_ptd.sh
+Launch llama2-7B lora inference script: examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```bash
 bash examples/llama2/generate_llama2_7b_lora_ptd.sh
 ```
@@ -280,7 +280,7 @@ Some inference samples are as follows:
 
 ## Evaluation-7B
 We use MMLU benchmark to evaluate our model. Benchmark Download [here](https://huggingface.co/datasets/cais/mmlu). 
-Config llama2-7B evaluation script: tasks/evaluation/evaluate_llama2_7B_ptd.sh
+Config llama2-7B evaluation script: examples/llama2/evaluate_llama2_7B_ptd.sh
 
 ```bash
 source /usr/local/Ascend/ascend-toolkit/set_env.sh 
@@ -295,7 +295,7 @@ TASK="mmlu"
 Launch llama2-7B evaluation script:
 
 ```bash
-bash tasks/evaluation/evaluate_llama2_7B_ptd.sh
+bash examples/llama2/evaluate_llama2_7B_ptd.sh
 ```
 
 Evaluation results
@@ -425,7 +425,7 @@ Here's a hardware summary of pre-training  LLaMA2-13B:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
     # convert weights
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader llama2_hf \
         --saver megatron \
         --target-tensor-parallel-size 8 \
@@ -441,7 +441,7 @@ Here's a hardware summary of pre-training  LLaMA2-13B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -539,10 +539,10 @@ Here's a hardware summary of pre-training  LLaMA2-13B:
    ```
    
    
-   Launch LLAMA2-13B lora fine tune script: tasks/finetune/tune_llama2_13b_ptd.sh
+   Launch LLAMA2-13B lora fine tune script: examples/llama2/tune_llama2_13b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_13b_ptd.sh 
+    bash examples/llama2/tune_llama2_13b_ptd.sh 
    ```
 
 ### Performance
@@ -571,7 +571,7 @@ CHECKPOINT=./llama2-13b-tp8-pp1/
 TOKENIZER_PATH=./llama2-13b-hf/
 ```
 
-Config Llama2-13B lora inference script: tasks/inference/generate_llama2_13b_lora_ptd.sh
+Config Llama2-13B lora inference script: examples/llama2/generate_llama2_13b_lora_ptd.sh
 
 ```bash
 # modify lora model directory path
@@ -580,11 +580,11 @@ CHECKPOINT_LORA="your lora model directory path"
 
 Launch Llama2-13B inference script.
 ```shell
-bash ./tasks/inference/generate_llama2_13b_ptd.sh
+bash examples/llama2/generate_llama2_13b_ptd.sh
 ```
 Launch Llama2-13B lora inference script.
 ```shell
-bash ./tasks/inference/generate_llama2_13b_lora_ptd.sh
+bash examples/llama2/generate_llama2_13b_lora_ptd.sh
 ```
 Some inference samples are as follows:
 ![llama2-13B-generate.png](../../sources/images/llama2/llama2-13B-generate.png)
@@ -601,7 +601,7 @@ We use boolq benchmark to evaluate our model. Benchmark Download [here](https://
 ```
 
 ```shell
-bash tasks/evaluation/evaluate_llama2_13B_ptd.sh
+bash examples/llama2/evaluate_llama2_13B_ptd.sh
 ```
 
 <table>
@@ -742,7 +742,7 @@ pip install -r requirements.txt
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
     # convert to megatron weights
-    python tools/checkpoint/util.py \
+    python tools/checkpoint/convert_ckpt.py \
     --model-type GPT \
     --loader llama2_hf \
     --saver megatron \
@@ -758,7 +758,7 @@ pip install -r requirements.txt
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
     # convert to megatron weights
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
      --loader llama2_hf \
      --saver megatron \
      --target-tensor-parallel-size 8 \
@@ -775,7 +775,7 @@ pip install -r requirements.txt
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -789,7 +789,7 @@ pip install -r requirements.txt
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader megatron \
         --saver megatron \
         --save-model-type save_huggingface_llama \
@@ -931,16 +931,16 @@ pip install -r requirements.txt
        --lora-load ${LORA_CHECKPOINT} \
    ```
    
-   Launch LLAMA2-34B lora fine tune script: tasks/finetune/tune_llama2_34b_ptd.sh
+   Launch LLAMA2-34B lora fine tune script: examples/llama2/tune_llama2_34b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_34b_ptd.sh 
+    bash examples/llama2/tune_llama2_34b_ptd.sh 
    ```
    
-   Launch LLAMA2-70B lora fine tune script: tasks/finetune/tune_llama2_70b_ptd.sh
+   Launch LLAMA2-70B lora fine tune script: examples/llama2/tune_llama2_70b_ptd.sh
    
    ```shell
-    bash tasks/finetune/tune_llama2_70b_ptd.sh 
+    bash examples/llama2/tune_llama2_70b_ptd.sh 
    ```
 
 ### Performance-2
@@ -965,9 +965,9 @@ Models could generate with 8 NPUs, for example:
 
 Config inference script:
 
-LLaMA2-34B:`tasks/inference/generate_llama2_34B_ptd.sh`.
+LLaMA2-34B:`examples/llama2/generate_llama2_34B_ptd.sh`.
 
-LLaMA2-70B:`tasks/inference/generate_llama2_70b_ptd.sh`.
+LLaMA2-70B:`examples/llama2/generate_llama2_70b_ptd.sh`.
 
 ```shell
 # Modify checkpoint path and vocabfile path.
@@ -983,19 +983,19 @@ CHECKPOINT_LORA="your lora model directory path"
 
 Launch LLaMA2-34B inference:
 ```shell
-bash ./tasks/inference/generate_llama2_34B_ptd.sh
+bash ./examples/llama2/generate_llama2_34B_ptd.sh
 ```
 Launch LLaMA2-34B lora inference:
 ```shell
-bash ./tasks/inference/generate_llama2_34b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_34b_lora_ptd.sh
 ```
 Launch LLaMA2-70B inference:
 ```shell
-bash ./tasks/llama2/generate_llama2_70b_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_ptd.sh
 ```
 Launch LLaMA2-70B lora inference:
 ```shell
-bash ./tasks/llama2/generate_llama2_70b_lora_ptd.sh
+bash ./examples/llama2/generate_llama2_70b_lora_ptd.sh
 ```
 
 Some inference samples of LLaMA2-34B are as follows:
@@ -1012,9 +1012,9 @@ Download dev part[here](https://storage.googleapis.com/boolq/dev.jsonl) and put
 
 Config evaluation script:
 
-LLaMA2-34B:`tasks/evaluation/evaluate_llama2_34B_ptd.sh`.
+LLaMA2-34B:`examples/llama2/evaluate_llama2_34B_ptd.sh`.
 
-LLaMA2-70B:`tasks/evaluation/evaluate_llama2_70B_ptd.sh`.
+LLaMA2-70B:`examples/llama2/evaluate_llama2_70B_ptd.sh`.
 
 ```shell
 # Modify checkpoint path and vocabfile path.
@@ -1024,11 +1024,11 @@ TOKENIZER_PATH=<tokenizer-path>
 
 Launch LLaMA2-34B evaluation:
 ```shell
-bash tasks/evaluation/evaluate_llama2_34B_ptd.sh
+bash examples/llama2/evaluate_llama2_34B_ptd.sh
 ```
 Launch LLaMA2-70B evaluation:
 ```shell
-bash tasks/evaluation/evaluate_llama2_70B_ptd.sh
+bash examples/llama2/evaluate_llama2_70B_ptd.sh
 ```
 
 Evaluation results with BoolQ dataset:
diff --git a/tasks/evaluation/evaluate_llama2_13B_ptd.sh b/examples/llama2/evaluate_llama2_13B_ptd.sh
similarity index 94%
rename from tasks/evaluation/evaluate_llama2_13B_ptd.sh
rename to examples/llama2/evaluate_llama2_13B_ptd.sh
index 285c79ae30a12b8460f61268d4a43acfb8fef36b..0244458ecacfe4702cd486ed74c970411800f3a1 100644
--- a/tasks/evaluation/evaluate_llama2_13B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_13B_ptd.sh
@@ -22,7 +22,7 @@ TOKENIZER_PATH="Your tokenizer path"
 DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_llama2_34B_ptd.sh b/examples/llama2/evaluate_llama2_34B_ptd.sh
similarity index 95%
rename from tasks/evaluation/evaluate_llama2_34B_ptd.sh
rename to examples/llama2/evaluate_llama2_34B_ptd.sh
index d4749e1148f320efad7194e370d5047a3b0ce021..dbd2f8227872c4c92d3f3cd0f185373d05323c8e 100644
--- a/tasks/evaluation/evaluate_llama2_34B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_34B_ptd.sh
@@ -22,7 +22,7 @@ TOKENIZER_PATH="Your tokenizer path"
 DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_llama2_70B_ptd.sh b/examples/llama2/evaluate_llama2_70B_ptd.sh
similarity index 91%
rename from tasks/evaluation/evaluate_llama2_70B_ptd.sh
rename to examples/llama2/evaluate_llama2_70B_ptd.sh
index a9a28f3eb1cec64af41b0fa1555fa9d257a4c3f9..ffcafd9385ad1d330699cd52685aa9cfd1a7a6ff 100644
--- a/tasks/evaluation/evaluate_llama2_70B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_70B_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./boolq/data/test/"
 TASK="boolq"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/evaluation/evaluate_llama2_7B_ptd.sh b/examples/llama2/evaluate_llama2_7B_ptd.sh
similarity index 95%
rename from tasks/evaluation/evaluate_llama2_7B_ptd.sh
rename to examples/llama2/evaluate_llama2_7B_ptd.sh
index cbab4e2fce0f39fdbede3ed2ddea4ce508e9f008..606e7b0687d9a87c08896b851418cacee6b0110c 100644
--- a/tasks/evaluation/evaluate_llama2_7B_ptd.sh
+++ b/examples/llama2/evaluate_llama2_7B_ptd.sh
@@ -22,7 +22,7 @@ NODE_RANK=0
 NPUS_PER_NODE=1
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 # configure generation parameters
-python -m torch.distributed.launch $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py   \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 4096 \
diff --git a/tasks/inference/generate_llama2_13b_lora_ptd.sh b/examples/llama2/generate_llama2_13b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_13b_lora_ptd.sh
rename to examples/llama2/generate_llama2_13b_lora_ptd.sh
index c26ab51893dc538f80f58f568c8d08cbd4cc7971..2ae5c480fde7041c665986a178846c4166ca6421 100644
--- a/tasks/inference/generate_llama2_13b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_13b_lora_ptd.sh
@@ -26,7 +26,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
@@ -59,4 +59,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-r 16 \
        --lora-alpha 32 \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_13b_lora.log
diff --git a/tasks/inference/generate_llama2_13b_ptd.sh b/examples/llama2/generate_llama2_13b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_13b_ptd.sh
rename to examples/llama2/generate_llama2_13b_ptd.sh
index 4f2040673671a285ae97bf07f8f7d4e130f8f7bc..0dbe63c9eb65650e97aed7d4c35b2ceb6077e6f5 100644
--- a/tasks/inference/generate_llama2_13b_ptd.sh
+++ b/examples/llama2/generate_llama2_13b_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_llama2_34B_ptd.sh b/examples/llama2/generate_llama2_34B_ptd.sh
similarity index 95%
rename from tasks/inference/generate_llama2_34B_ptd.sh
rename to examples/llama2/generate_llama2_34B_ptd.sh
index ffacd7e2f4d817db163db5aef66d5e3df66dc43f..c89c331b5464dc91ab8fe8caac530ef787928d45 100644
--- a/tasks/inference/generate_llama2_34B_ptd.sh
+++ b/examples/llama2/generate_llama2_34B_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 48  \
diff --git a/tasks/inference/generate_llama2_34b_lora_ptd.sh b/examples/llama2/generate_llama2_34b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_34b_lora_ptd.sh
rename to examples/llama2/generate_llama2_34b_lora_ptd.sh
index 548ffc485828ccf61e08ec7285eab73a2666deff..2884896b00b99eba5ff17019b89aaf59c2692a08 100644
--- a/tasks/inference/generate_llama2_34b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_34b_lora_ptd.sh
@@ -26,7 +26,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 48  \
@@ -61,4 +61,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --make-vocab-size-divisible-by 1 \
        --group-query-attention \
        --num-query-groups 8 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_34b_lora.log
diff --git a/tasks/inference/generate_llama2_70b_lora_ptd.sh b/examples/llama2/generate_llama2_70b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_70b_lora_ptd.sh
rename to examples/llama2/generate_llama2_70b_lora_ptd.sh
index a5f53db5b39a19068bbaa6b8ee9b8721a585b6eb..a1de86d11c6052110cac5f84988d4f906557be03 100644
--- a/tasks/inference/generate_llama2_70b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_70b_lora_ptd.sh
@@ -20,7 +20,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 2  \
        --num-layers 80 \
@@ -55,4 +55,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_70b_lora.log
diff --git a/tasks/inference/generate_llama2_70b_ptd.sh b/examples/llama2/generate_llama2_70b_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_70b_ptd.sh
rename to examples/llama2/generate_llama2_70b_ptd.sh
index 0f5ba74a231f94f182c5ea50237c3ab56adbca25..18af322850d71f3973b3f321ee319177b8b7efc0 100644
--- a/tasks/inference/generate_llama2_70b_ptd.sh
+++ b/examples/llama2/generate_llama2_70b_ptd.sh
@@ -19,7 +19,7 @@ NPUS_PER_NODE=8
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 80 \
diff --git a/tasks/inference/generate_llama2_7b_lora_ptd.sh b/examples/llama2/generate_llama2_7b_lora_ptd.sh
similarity index 94%
rename from tasks/inference/generate_llama2_7b_lora_ptd.sh
rename to examples/llama2/generate_llama2_7b_lora_ptd.sh
index 3aaf738edb70511ddf5be6cc9ae3d31fb617ae6a..e2e723729b916ffd9c53be055303f2a2b7dd6770 100644
--- a/tasks/inference/generate_llama2_7b_lora_ptd.sh
+++ b/examples/llama2/generate_llama2_7b_lora_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_alpaca.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
@@ -55,4 +55,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference
        --lora-alpha 32 \
        --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \
        --make-vocab-size-divisible-by 1 \
+       --inference-prompt-type 'alpaca' \
        | tee logs/generate_llama2_7b_lora.log
diff --git a/tasks/inference/generate_llama2_7b_ptd.sh b/examples/llama2/generate_llama2_7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_llama2_7b_ptd.sh
rename to examples/llama2/generate_llama2_7b_ptd.sh
index 3e4c5b5da140bd1bf80f7377e077ee0f8caec232..23405e7ae3bca0304d82b7df830d35b481ef3333 100644
--- a/tasks/inference/generate_llama2_7b_ptd.sh
+++ b/examples/llama2/generate_llama2_7b_ptd.sh
@@ -21,7 +21,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/tasks/finetune/tune_llama2_13b_ptd.sh b/examples/llama2/tune_llama2_13b_ptd.sh
similarity index 96%
rename from tasks/finetune/tune_llama2_13b_ptd.sh
rename to examples/llama2/tune_llama2_13b_ptd.sh
index c3bde4a2c7cb12f59c80feb89638c1eac0fa7b94..0b78c404fb5ad5d210cfd7a5162625fcaa1edbc7 100644
--- a/tasks/finetune/tune_llama2_13b_ptd.sh
+++ b/examples/llama2/tune_llama2_13b_ptd.sh
@@ -47,10 +47,10 @@ GPT_ARGS="
     --seq-length 4096 \
     --max-position-embeddings 4096 \
     --micro-batch-size 2 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1e-6 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/tasks/finetune/tune_llama2_34b_ptd.sh b/examples/llama2/tune_llama2_34b_ptd.sh
similarity index 96%
rename from tasks/finetune/tune_llama2_34b_ptd.sh
rename to examples/llama2/tune_llama2_34b_ptd.sh
index 2e69a9265ab0aae82046172a6507bf9f693e5fba..37c3a5006683bdda8b4174499173754c58299b63 100644
--- a/tasks/finetune/tune_llama2_34b_ptd.sh
+++ b/examples/llama2/tune_llama2_34b_ptd.sh
@@ -47,10 +47,10 @@ GPT_ARGS="
     --seq-length 4096 \
     --max-position-embeddings 4096 \
     --micro-batch-size 2 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1.5e-4 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/tasks/finetune/tune_llama2_70b_ptd.sh b/examples/llama2/tune_llama2_70b_ptd.sh
similarity index 100%
rename from tasks/finetune/tune_llama2_70b_ptd.sh
rename to examples/llama2/tune_llama2_70b_ptd.sh
diff --git a/tasks/finetune/tune_llama2_7b_ptd.sh b/examples/llama2/tune_llama2_7b_ptd.sh
similarity index 94%
rename from tasks/finetune/tune_llama2_7b_ptd.sh
rename to examples/llama2/tune_llama2_7b_ptd.sh
index b24b5068b41f263d8fa418bcdb51c1966f4a0980..6fe08e1c95a5872666d82e663f449c76cce27eff 100644
--- a/tasks/finetune/tune_llama2_7b_ptd.sh
+++ b/examples/llama2/tune_llama2_7b_ptd.sh
@@ -3,8 +3,6 @@
 export CUDA_DEVICE_MAX_CONNECTIONS=1
 export NPU_ASD_ENABLE=0
 
-source "./envs/ascend-toolkit/set_env.sh"
-
 GPUS_PER_NODE=8
 MASTER_ADDR=localhost
 MASTER_PORT=6001
@@ -45,10 +43,10 @@ GPT_ARGS="
     --seq-length 4096 \
     --max-position-embeddings 4096 \
     --micro-batch-size 4 \
-    --global-batch-size 16 \
+    --global-batch-size 128 \
     --make-vocab-size-divisible-by 1 \
     --lr 1.25e-6 \
-    --train-iters 200 \
+    --train-iters 2000 \
     --lr-decay-style cosine \
     --untie-embeddings-and-output-weights \
     --disable-bias-linear \
diff --git a/examples/mixtral/README.md b/examples/mixtral/README.md
index 9582383611daafeb48259ba1efaf003f1b0865fa..7778d9109bc1d2e0b79ec743d70f21a14bfba122 100644
--- a/examples/mixtral/README.md
+++ b/examples/mixtral/README.md
@@ -82,7 +82,7 @@
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # 权重格式转换
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -133,7 +133,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # HF 转 tp1-pp8-ep2
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -151,7 +151,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # tp1-pp8-ep2 转 tp1-pp8-ep1
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1-ep2-pp8 \
@@ -167,7 +167,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
    
     # tp1-pp8-ep2 转 HF
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --save-model-type huggingface \
@@ -233,6 +233,7 @@ python ./tools/preprocess_data.py \
 
 Mixtral-8x7B 在双机16卡上(ep2 pp8) **昇腾芯片** 和 **参考芯片** 上的性能对比：
 *(当节点够多的情况下，ep越大吞吐越大，这里并非为最佳性能，仅供参考)*
+
 | 设备 |   模型   | 迭代数 | 样本吞吐 (samples/step) | tokens吞吐 (tokens/s/p) | 单步迭代时间 (s/step) |
 | :--: | :-------: | :----: |:-------------------:|:---------------------:|:-------------------: |
 | NPUs | Mixtral-8x7B |  1000  |  4.11  |  1053.6  |  31.13  |
@@ -241,7 +242,7 @@ Mixtral-8x7B 在双机16卡上(ep2 pp8) **昇腾芯片** 和 **参考芯片** 
 
 ## 模型推理
 
-首先需要配置推理脚本: ***tasks/inference/generate_mixtral_8x7b_ptd.sh***
+首先需要配置推理脚本: ***examples/mixtral/generate_mixtral_8x7b_ptd.sh***
 
 ```bash
 # 根据您自己的 ascend-toolkit 路径，执行set_env.sh
@@ -264,13 +265,13 @@ PP=1
 该文档中使用的Mixtral-8x7B-v0.1模型为L0模型，只有续写能力，推理不带任何模板并且容易出现重复或回答不停止的情况。
 
 如若想要有较好的人机对话能力，请使用Mixtral-8x7B-Instruct-v0.1模型，该模型为指令遵从度训练后需要配合模板使用，基本操作同上，仅启动入口有变化：
-torchrun $DISTRIBUTED_ARGS tasks/inference/inference_mixtral.py
+torchrun $DISTRIBUTED_ARGS inference.py
 ```
 
 然后可直接启动
 
 ```bash
-bash tasks/inference/generate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/generate_mixtral_8x7b_ptd.sh
 ```
 
 推理的示例如下:
@@ -279,7 +280,7 @@ bash tasks/inference/generate_mixtral_8x7b_ptd.sh
 ## 模型评估
 
 使用 MMLU数据集评估模型. 数据集下载路径 [这里](https://huggingface.co/datasets/cais/mmlu). 
-配置评估脚本: tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+配置评估脚本: examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -296,7 +297,7 @@ TASK="mmlu"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
 ```
 评估结果如下
 
diff --git a/examples/mixtral/README_en.md b/examples/mixtral/README_en.md
index 56787af90f02202d10ddcc848130a49e7da6dd69..9c5315d418975a49b5fcf7ab82814b8c3b5446ce 100644
--- a/examples/mixtral/README_en.md
+++ b/examples/mixtral/README_en.md
@@ -81,7 +81,7 @@ Recommended hardware configuration for inference:
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # Convert weight format
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -132,7 +132,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # HF to tp1-pp8-ep2
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_hf \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1 \
@@ -150,7 +150,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # tp1-pp8-ep2 to tp1-pp8-ep1
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --load-dir ../Mixtral-8x7B-v0.1-ep2-pp8 \
@@ -166,7 +166,7 @@ python ./tools/preprocess_data.py \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
     # tp1-pp8-ep2 to HF
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
         --loader mixtral_mg \
         --saver mixtral \
         --save-model-type huggingface \
@@ -240,7 +240,7 @@ Comparison of Mixtral-8x7B performance on 2 nodes and 16 chips with ep2 pp8:
 
 ## Model-Inference
 
-First, configure the inference script: ***tasks/inference/generate_mixtral_8x7b_ptd.sh***
+First, configure the inference script: ***examples/mixtral/generate_mixtral_8x7b_ptd.sh***
 
 ```bash
 # Execute set_env.sh according to your own ascend-toolkit path
@@ -263,13 +263,13 @@ PP=1
 The Mixtral-8x7B-v0.1 model used in this document is an L0 model, only with continuation ability, inference does not involve any templates and is prone to repetition or non-stop answering.
 
 If you want to have better human-machine dialogue capabilities, please use the Mixtral-8x7B-Instruct-v0.1 model. This model requires instruction compliance training and needs to be used with templates. The basic operations are the same as above, only the startup entry has changed:
-torchrun $DISTRIBUTED_ARGS tasks/inference/inference_mixtral.py
+torchrun $DISTRIBUTED_ARGS inference.py
 ```
 
 Then you can start it directly
 
 ```bash
-bash tasks/inference/generate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/generate_mixtral_8x7b_ptd.sh
 ```
 
 An example of inference is as follows:
@@ -278,7 +278,7 @@ An example of inference is as follows:
 ## Model-Evaluation
 
 Evaluate the model using the MMLU dataset. Dataset download path [here](https://huggingface.co/datasets/cais/mmlu). 
-Configure the evaluation script: ***tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh***
+Configure the evaluation script: ***examples/mixtral/evaluate_mixtral_8x7b_ptd.sh***
 
 ```bash
 # Ascend-toolkit path
@@ -296,7 +296,7 @@ TASK="mmlu"
 Start the evaluation
 
 ```bash
-bash tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+bash examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
 ```
 The evaluation results are as follows
 
diff --git a/tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh b/examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
rename to examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
index a9f5c31faf6018c8c092378b900f84aa61a5573a..1e43e25afc1bc3e84a65cdc02ba4e12d856d220e 100644
--- a/tasks/evaluation/evaluate_mixtral_8x7b_ptd.sh
+++ b/examples/mixtral/evaluate_mixtral_8x7b_ptd.sh
@@ -66,7 +66,7 @@ MOE_ARGS="
     --moe-train-capacity-factor 8.0
 "
 
-torchrun $DISTRIBUTED_ARGS tasks/evaluation/evaluation_llama.py \
+torchrun $DISTRIBUTED_ARGS evaluation.py \
     $GPT_ARGS \
     $MOE_ARGS \
     --distributed-backend nccl | tee logs/evaluation_mixtral_${TASK}.log
diff --git a/tasks/inference/generate_mixtral_8x7b_ptd.sh b/examples/mixtral/generate_mixtral_8x7b_ptd.sh
similarity index 95%
rename from tasks/inference/generate_mixtral_8x7b_ptd.sh
rename to examples/mixtral/generate_mixtral_8x7b_ptd.sh
index 03252f0ab7ef825f34ecfd796a2a4b6bd3a954a5..eda91d14ed9daebaab76df25758f48c737f3894a 100644
--- a/tasks/inference/generate_mixtral_8x7b_ptd.sh
+++ b/examples/mixtral/generate_mixtral_8x7b_ptd.sh
@@ -62,8 +62,9 @@ MOE_ARGS="
     --moe-train-capacity-factor 8.0
 "
 
-torchrun $DISTRIBUTED_ARGS tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
     $GPT_ARGS \
     $MOE_ARGS \
     --distributed-backend nccl \
+    --inference_prompt_type 'mixtral' \
     | tee logs/generate_mixtral.log
diff --git a/examples/qwen/README.md b/examples/qwen/README.md
index 57f26d94fd220d44b88ce6090c7b1d00fa50962a..c14415bc48c468b0146d2f8cafb8f40696221d36 100644
--- a/examples/qwen/README.md
+++ b/examples/qwen/README.md
@@ -118,7 +118,7 @@ Qwen-7B 训练的硬件配置:
    # 修改 ascend-toolkit 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader qwen_hf \
                                    --saver megatron \
                                    --target-tensor-parallel-size 8 \
@@ -135,7 +135,7 @@ Qwen-7B 训练的硬件配置:
    cd ModelLink/
    # 请按照您的真实环境修改 set_env.sh 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader megatron \
                                    --saver megatron \
                                    --save-model-type save_huggingface_qwen \
@@ -203,7 +203,7 @@ Qwen-7B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-配置 qwen-7b 推理脚本：tasks/inference/generate_qwen_7b_ptd.sh
+配置 qwen-7b 推理脚本：examples/qwen/generate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -216,7 +216,7 @@ TOKENIZER_PATH="your tokenizer directory path"
 
 启动qwen-7b推理脚本
 ```bash
-bash tasks/inference/generate_qwen_7b_ptd.sh
+bash examples/qwen/generate_qwen_7b_ptd.sh
 ```
 
 推理示例如下：
@@ -227,7 +227,7 @@ bash tasks/inference/generate_qwen_7b_ptd.sh
 
 使用[CEval数据集](https://huggingface.co/datasets/ceval/ceval-exam)和[MMLU数据集](https://huggingface.co/datasets/cais/mmlu)评估模型.
 
-配置qwen-7b评估脚本: tasks/evaluation/evaluate_qwen_7b_ptd.sh
+配置qwen-7b评估脚本: examples/qwen/evaluate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -245,7 +245,7 @@ TASK="mmlu"  # ceval任务配置为 "ceval"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_qwen_7b_ptd.sh
+bash examples/qwen/evaluate_qwen_7b_ptd.sh
 ```
 
 | 数据集 | 总学科数 | 总问题数 |                                     参考准确率                                     | NPU准确率 |
@@ -348,7 +348,7 @@ Qwen-14B 训练的硬件配置:
    # 修改 ascend-toolkit 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader qwen_hf \
                                    --saver megatron \
                                    --target-tensor-parallel-size 8 \
@@ -363,7 +363,7 @@ Qwen-14B 训练的硬件配置:
    cd ModelLink/
    # 请按照您的真实环境修改 set_env.sh 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
       --loader megatron \
       --saver megatron \
       --save-model-type save_huggingface_qwen \
@@ -430,7 +430,7 @@ Qwen-14B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-配置 qwen-14b 推理脚本：tasks/inference/generate_qwen_14b_ptd.sh
+配置 qwen-14b 推理脚本：examples/qwen/generate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -443,7 +443,7 @@ TOKENIZER_PATH=./qwen-14b-hf
 
 启动qwen-14b推理脚本
 ```bash
-bash tasks/inference/generate_qwen_14b_ptd.sh
+bash examples/qwen/generate_qwen_14b_ptd.sh
 ```
 
 推理示例如下：
@@ -454,7 +454,7 @@ bash tasks/inference/generate_qwen_14b_ptd.sh
 
 使用[CEval数据集](https://huggingface.co/datasets/ceval/ceval-exam)和[MMLU数据集](https://huggingface.co/datasets/cais/mmlu)评估模型.
 
-配置qwen-14b评估脚本: tasks/evaluation/evaluate_qwen_14b_ptd.sh
+配置qwen-14b评估脚本: examples/qwen/evaluate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -471,7 +471,7 @@ TASK="mmlu"  # ceval任务配置为 "ceval"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_qwen_14b_ptd.sh
+bash examples/qwen/evaluate_qwen_14b_ptd.sh
 ```
 
 | 数据集 | 总学科数 | 总问题数 |                    参考准确率                     | NPU准确率 |
@@ -557,7 +557,7 @@ Qwen-72B 训练的硬件配置:
    # 修改 ascend-toolkit 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
       
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                    --loader qwen_hf \
                                    --saver megatron \
                                    --target-tensor-parallel-size 8 \
@@ -573,7 +573,7 @@ Qwen-72B 训练的硬件配置:
    cd ModelLink/
    # 请按照您的真实环境修改 set_env.sh 路径
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
-   python tools/checkpoint/util.py --model-type GPT \
+   python tools/checkpoint/convert_ckpt.py --model-type GPT \
       --loader megatron \
       --saver megatron \
       --save-model-type save_huggingface_qwen \
@@ -650,7 +650,7 @@ Qwen-72B 在 **昇腾芯片** 和 **参考芯片** 上的性能对比：
 
 ## 推理
 
-配置 qwen-72b 推理脚本：tasks/inference/generate_qwen_72b_ptd.sh
+配置 qwen-72b 推理脚本：examples/qwen/generate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -663,7 +663,7 @@ TOKENIZER_PATH=./qwen-72b-hf
 
 启动qwen-72b推理脚本
 ```bash
-bash tasks/inference/generate_qwen_72b_ptd.sh
+bash examples/qwen/generate_qwen_72b_ptd.sh
 ```
 
 推理示例如下：
@@ -674,7 +674,7 @@ bash tasks/inference/generate_qwen_72b_ptd.sh
 
 使用[CEval数据集](https://huggingface.co/datasets/ceval/ceval-exam)和[MMLU数据集](https://huggingface.co/datasets/cais/mmlu)评估模型.
 
-配置qwen-72b评估脚本: tasks/evaluation/evaluate_qwen_72b_ptd.sh
+配置qwen-72b评估脚本: examples/qwen/evaluate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit 路径
@@ -692,7 +692,7 @@ TASK="mmlu"  # ceval任务配置为 "ceval"
 启动评估
 
 ```bash
-bash tasks/evaluation/evaluate_qwen_72b_ptd.sh
+bash examples/qwen/evaluate_qwen_72b_ptd.sh
 ```
 
 | 数据集 | 总学科数 | 总问题数 |                    参考准确率                     | NPU准确率 |
diff --git a/examples/qwen/README_en.md b/examples/qwen/README_en.md
index 90fe6351d43407fce47987b4dd36c279b70b3ea0..9d0de0d06557e0c9f32c34798302df39a284b4c3 100644
--- a/examples/qwen/README_en.md
+++ b/examples/qwen/README_en.md
@@ -120,7 +120,7 @@ Here's a hardware summary of pre-training  Qwen-7B:
     # modify the script according to your own ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader qwen_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -136,7 +136,7 @@ Here's a hardware summary of pre-training  Qwen-7B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader megatron \
                                     --saver megatron \
                                     --save-model-type save_huggingface_llama \
@@ -198,7 +198,7 @@ The performance of Qwen-7B in **Ascend NPU** and **Reference**:
 | Reference | Qwen-7B |             2867             |
 
 ## Inference
-Config qwen-7b inference script: tasks/inference/generate_qwen_7b_ptd.sh
+Config qwen-7b inference script: examples/qwen/generate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -209,9 +209,9 @@ CHECKPOINT="your model directory path"
 TOKENIZER_PATH="your tokenizer directory path"
 ```
 
-Launch qwen-7b inference script: tasks/inference/generate_qwen_7b_ptd.sh
+Launch qwen-7b inference script: examples/qwen/generate_qwen_7b_ptd.sh
 ```bash
-bash tasks/inference/generate_qwen_7b_ptd.sh
+bash examples/qwen/generate_qwen_7b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -221,7 +221,7 @@ Some inference samples are as follows:
 ## Evaluation
 We use the [CEval benchmark](https://huggingface.co/datasets/ceval/ceval-exam) and [MMLU benchmark](https://huggingface.co/datasets/cais/mmlu) to evaluate our model. 
 
-Config qwen-7b evaluation script: tasks/evaluation/evaluate_qwen_7b_ptd.sh
+Config qwen-7b evaluation script: examples/qwen/evaluate_qwen_7b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -239,7 +239,7 @@ TASK="mmlu"  # "ceval" for ceval task
 Launch qwen-7b evaluation
 
 ```bash
-bash ./tasks/evaluation/evaluate_qwen_7b_ptd.sh
+bash examples/qwen/evaluate_qwen_7b_ptd.sh
 ```
 
 | Task | Subset | Question | OpenSource | NPU |
@@ -345,7 +345,7 @@ Here's a hardware summary of pre-training  Qwen-14B:
     # modify the script according to your own ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader qwen_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -360,7 +360,7 @@ Here's a hardware summary of pre-training  Qwen-14B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader megatron \
                                     --saver megatron \
                                     --save-model-type save_huggingface_llama \
@@ -423,7 +423,7 @@ The performance of Qwen-14B in **Ascend NPU** and **Reference**:
 
 ## Inference
 
-Config qwen-14b inference script: tasks/inference/generate_qwen_14b_ptd.sh
+Config qwen-14b inference script: examples/qwen/generate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -434,9 +434,9 @@ CHECKPOINT="your model directory path"
 TOKENIZER_PATH=./qwen-14b-hf
 ```
 
-Launch qwen-14b inference script: tasks/inference/generate_qwen_14b_ptd.sh
+Launch qwen-14b inference script: examples/qwen/generate_qwen_14b_ptd.sh
 ```bash
-bash tasks/inference/generate_qwen_7b_ptd.sh
+bash examples/qwen/generate_qwen_7b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -447,7 +447,7 @@ Some inference samples are as follows:
 
 We use the [CEval benchmark](https://huggingface.co/datasets/ceval/ceval-exam) and [MMLU benchmark](https://huggingface.co/datasets/cais/mmlu) to evaluate our model. 
 
-Config qwen-14b evaluation script: tasks/evaluation/evaluate_qwen_14b_ptd.sh
+Config qwen-14b evaluation script: examples/qwen/evaluate_qwen_14b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -465,7 +465,7 @@ TASK="mmlu"  # "ceval" for ceval task
 Launch qwen-14b evaluation
 
 ```bash
-bash ./tasks/evaluation/evaluate_qwen_14b_ptd.sh
+bash examples/qwen/evaluate_qwen_14b_ptd.sh
 ```
 
 | Task | Subset | Question | OpenSource | NPU |
@@ -551,7 +551,7 @@ Here's a hardware summary of pre-training  Qwen-72B:
     # modify the script according to your own ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
     
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader qwen_hf \
                                     --saver megatron \
                                     --target-tensor-parallel-size 8 \
@@ -567,7 +567,7 @@ Here's a hardware summary of pre-training  Qwen-72B:
     cd ModelLink/
     # Modify the ascend-toolkit path
     source /usr/local/Ascend/ascend-toolkit/set_env.sh
-    python tools/checkpoint/util.py --model-type GPT \
+    python tools/checkpoint/convert_ckpt.py --model-type GPT \
                                     --loader megatron \
                                     --saver megatron \
                                     --save-model-type save_huggingface_llama \
@@ -640,7 +640,7 @@ The performance of Qwen-72B in **Ascend NPU** and **Reference**:
 
 
 ## Inference
-Config qwen-72b inference script: tasks/inference/generate_qwen_72b_ptd.sh
+Config qwen-72b inference script: examples/qwen/generate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -651,9 +651,9 @@ CHECKPOINT="your model directory path"
 TOKENIZER_PATH=./qwen-72b-hf
 ```
 
-Launch qwen-72b inference script: tasks/inference/generate_qwen_72b_ptd.sh
+Launch qwen-72b inference script: examples/qwen/generate_qwen_72b_ptd.sh
 ```bash
-bash tasks/inference/generate_qwen_72b_ptd.sh
+bash examples/qwen/generate_qwen_72b_ptd.sh
 ```
 
 Some inference samples are as follows:
@@ -663,7 +663,7 @@ Some inference samples are as follows:
 ## Evaluation
 We use the [CEval benchmark](https://huggingface.co/datasets/ceval/ceval-exam) and [MMLU benchmark](https://huggingface.co/datasets/cais/mmlu) to evaluate our model. 
 
-Config qwen-72b evaluation script: tasks/evaluation/evaluate_qwen_72b_ptd.sh
+Config qwen-72b evaluation script: examples/qwen/evaluate_qwen_72b_ptd.sh
 
 ```bash
 # ascend-toolkit path
@@ -681,7 +681,7 @@ TASK="mmlu"  # "ceval" for ceval task
 Launch qwen-72b evaluation
 
 ```bash
-bash ./tasks/evaluation/evaluate_qwen_72b_ptd.sh
+bash examples/qwen/evaluate_qwen_72b_ptd.sh
 ```
 
 | Task | Subset | Question | OpenSource | NPU |
diff --git a/tasks/evaluation/evaluate_qwen_14b_ptd.sh b/examples/qwen/evaluate_qwen_14b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_qwen_14b_ptd.sh
rename to examples/qwen/evaluate_qwen_14b_ptd.sh
index 1a7cce307aa7fe0acfdac0b2c800ebd5114c7420..decf4062382f0d8dbb6e458c7fd8d21906962d61 100644
--- a/tasks/evaluation/evaluate_qwen_14b_ptd.sh
+++ b/examples/qwen/evaluate_qwen_14b_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./mmlu/data/test"
 TASK="mmlu"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-torchrun $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py   \
+torchrun $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 2048 \
diff --git a/tasks/evaluation/evaluate_qwen_72b_ptd.sh b/examples/qwen/evaluate_qwen_72b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_qwen_72b_ptd.sh
rename to examples/qwen/evaluate_qwen_72b_ptd.sh
index d0a3128b6805d1eb4ae7442586855fe309f38420..9704dfa8198ef293ef0a2eb713aaef523cbd766b 100644
--- a/tasks/evaluation/evaluate_qwen_72b_ptd.sh
+++ b/examples/qwen/evaluate_qwen_72b_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./mmlu/data/test"
 TASK="mmlu"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-torchrun $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py   \
+torchrun $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task $TASK\
        --seq-length 8192 \
diff --git a/tasks/evaluation/evaluate_qwen_7b_ptd.sh b/examples/qwen/evaluate_qwen_7b_ptd.sh
similarity index 96%
rename from tasks/evaluation/evaluate_qwen_7b_ptd.sh
rename to examples/qwen/evaluate_qwen_7b_ptd.sh
index 77497e8eea7047e4dbaa4762a5392b0dd5929a7d..6ff7aa0357614f4c0317a7e73bd2957e23f4021a 100644
--- a/tasks/evaluation/evaluate_qwen_7b_ptd.sh
+++ b/examples/qwen/evaluate_qwen_7b_ptd.sh
@@ -23,7 +23,7 @@ DATA_PATH="./mmlu/data/test"
 TASK="mmlu"
 
 # Different task needs different max_new_tokens value, please follow the instruction in readme.
-torchrun $DISTRIBUTED_ARGS ./tasks/evaluation/evaluation_llama.py   \
+torchrun $DISTRIBUTED_ARGS evaluation.py   \
        --task-data-path $DATA_PATH \
        --task ${TASK}\
        --seq-length 8192 \
diff --git a/tasks/inference/generate_qwen_14b_ptd.sh b/examples/qwen/generate_qwen_14b_ptd.sh
similarity index 96%
rename from tasks/inference/generate_qwen_14b_ptd.sh
rename to examples/qwen/generate_qwen_14b_ptd.sh
index 8ad2da74c57df93c052f707aa35f97073e927e2b..d06fa3a93307db3cc2f5dfbc82e49d2fa7b93632 100644
--- a/tasks/inference/generate_qwen_14b_ptd.sh
+++ b/examples/qwen/generate_qwen_14b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-torchrun $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 40 \
diff --git a/tasks/inference/generate_qwen_72b_ptd.sh b/examples/qwen/generate_qwen_72b_ptd.sh
similarity index 96%
rename from tasks/inference/generate_qwen_72b_ptd.sh
rename to examples/qwen/generate_qwen_72b_ptd.sh
index 678f42090178f09aad1f70bb778b20242f02dcde..03763108814627332ed8ffe3cc8b802f4ca87fc0 100644
--- a/tasks/inference/generate_qwen_72b_ptd.sh
+++ b/examples/qwen/generate_qwen_72b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-torchrun $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 80 \
diff --git a/tasks/inference/generate_qwen_7b_ptd.sh b/examples/qwen/generate_qwen_7b_ptd.sh
similarity index 96%
rename from tasks/inference/generate_qwen_7b_ptd.sh
rename to examples/qwen/generate_qwen_7b_ptd.sh
index 54c0e8776e8e267748d5a8a5b469a9b2c12509d0..1f067c21abcc6e2f27ca3545703bcb30a32ced70 100644
--- a/tasks/inference/generate_qwen_7b_ptd.sh
+++ b/examples/qwen/generate_qwen_7b_ptd.sh
@@ -20,7 +20,7 @@ WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
 
 DISTRIBUTED_ARGS="--nproc_per_node $NPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-torchrun $DISTRIBUTED_ARGS ./tasks/inference/inference_llama.py \
+torchrun $DISTRIBUTED_ARGS inference.py \
        --tensor-model-parallel-size 8  \
        --pipeline-model-parallel-size 1  \
        --num-layers 32 \
diff --git a/tasks/inference/inference_alpaca.py b/inference.py
similarity index 68%
rename from tasks/inference/inference_alpaca.py
rename to inference.py
index c44c46e5973605bb2f5864c739df8ec9b71ed643..3f0da94ba56e895cff569f2066bc0d38fc6c67f9 100644
--- a/tasks/inference/inference_alpaca.py
+++ b/inference.py
@@ -20,7 +20,7 @@ from megatron import get_args
 from megatron.model import GPTModel
 from megatron.initialize import initialize_megatron
 from megatron.arguments import core_transformer_config_from_args
-from tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
+from modellink.tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
 
 
 def model_provider(pre_process=True, post_process=True):
@@ -47,10 +47,17 @@ if __name__ == "__main__":
         pretrained_model_name_or_path=args.load
     )
 
-    system_template = "Below is an instruction that describes a task, paired with an input that provides further " \
-                      "context. Write a response that appropriately completes the request. " \
-                      "Please note that you need to think through your response logically and step by step.\n\n"
-    dialog_template = "### Instruction:\n{instruction}\n\n### Response:"
-    template = system_template + dialog_template
+    system_template = ""
+    dialog_template = "{instruction}"
+
+    if args.inference_prompt_type == 'alpaca':
+        system_template = "Below is an instruction that describes a task, paired with an input that provides further " \
+                          "context. Write a response that appropriately completes the request. " \
+                          "Please note that you need to think through your response logically and step by step.\n\n"
+        dialog_template = "### Instruction:\n{instruction}\n\n### Response:"
+
+    elif args.inference_prompt_type == 'mixtral':
+        system_template = "<s>"
+        dialog_template = "[INST] {instruction} [/INST] "
 
     task_factory(args, model, system_template=system_template, dialog_template=dialog_template)
diff --git a/modellink/checkpointing.py b/modellink/checkpointing.py
index 74cda6e0aa19bf72b5662cc84bfe3795931e843e..b8be1adcfa6c33e989ef55c10bfc78ed348ce588 100644
--- a/modellink/checkpointing.py
+++ b/modellink/checkpointing.py
@@ -18,7 +18,7 @@ import os
 from functools import wraps
 from megatron.checkpointing import _load_base_checkpoint
 from megatron import get_args
-from tasks.finetune.lora.utils import is_enable_lora, merge_dicts, modify_keys_with_dict
+from .tasks.finetune.lora.utils import is_enable_lora, merge_dicts, modify_keys_with_dict
 
 
 def _load_base_checkpoint_wrapper(fn):
diff --git a/modellink/model/gpt_model.py b/modellink/model/gpt_model.py
index 733e10e56fef486a09656d2fe271c0d619987ffd..2537722ebc0d3b85d5d3b3e396c2559450dc039c 100644
--- a/modellink/model/gpt_model.py
+++ b/modellink/model/gpt_model.py
@@ -18,7 +18,7 @@ from megatron.model.module import MegatronModule
 from megatron.model.enums import AttnMaskType
 from megatron.model.language_model import get_language_model
 from megatron.model.gpt_model import post_language_model_processing
-from tasks.inference.text_generation import MegatronModuleForCausalLM
+from ..tasks.inference.text_generation import MegatronModuleForCausalLM
 
 
 class GPTModel(MegatronModule, MegatronModuleForCausalLM):
diff --git a/modellink/model/transformer.py b/modellink/model/transformer.py
index f81e447a670672f58fe3e05dd06f2ec6153a095a..87ff0d20d7fca1ce1d47e23dcb307a47442a0d14 100644
--- a/modellink/model/transformer.py
+++ b/modellink/model/transformer.py
@@ -34,7 +34,7 @@ from megatron.model.utils import get_norm
 
 from modellink.error_utils import ensure_valid
 from modellink.model.alibi import Alibi, _build_alibi_tensor, _get_inverted_mask
-from tasks.finetune.lora.utils import is_enable_lora
+from ..tasks.finetune.lora.utils import is_enable_lora
 
 try:
     from einops import rearrange
diff --git a/modellink/tasks/__init__.py b/modellink/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a1307aeca2e4639a1e6807acda4a07f1b9856eb
--- /dev/null
+++ b/modellink/tasks/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/evaluation/eval_impl/template.py b/modellink/tasks/evaluation/__init__.py
similarity index 55%
rename from tasks/evaluation/eval_impl/template.py
rename to modellink/tasks/evaluation/__init__.py
index 75f77cf3a603c55ffe061aa6666e72516948c69b..aaf493892e8d5b95236d1458d1bab68ba11135cc 100644
--- a/tasks/evaluation/eval_impl/template.py
+++ b/modellink/tasks/evaluation/__init__.py
@@ -12,10 +12,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-GSM8K_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json'
-MMLU_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json'
-CEVAL_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json'
-AGIEVAL_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json'
-BBH_TEMPLATE_DIR = 'tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson'
-CODE_TEST_LOG_DIR = 'tasks/evaluation/codecheck_log'
diff --git a/modellink/tasks/evaluation/eval_api/__init__.py b/modellink/tasks/evaluation/eval_api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a1307aeca2e4639a1e6807acda4a07f1b9856eb
--- /dev/null
+++ b/modellink/tasks/evaluation/eval_api/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/evaluation/eval_api/chat.py b/modellink/tasks/evaluation/eval_api/chat.py
similarity index 100%
rename from tasks/evaluation/eval_api/chat.py
rename to modellink/tasks/evaluation/eval_api/chat.py
diff --git a/tasks/evaluation/eval_api/dataset_eval.py b/modellink/tasks/evaluation/eval_api/dataset_eval.py
similarity index 96%
rename from tasks/evaluation/eval_api/dataset_eval.py
rename to modellink/tasks/evaluation/eval_api/dataset_eval.py
index 02efe6fb6825f405e43866a7d92e1a7742253f92..bc5e42e7abf69956c7b38ff5c617f6352164c419 100644
--- a/tasks/evaluation/eval_api/dataset_eval.py
+++ b/modellink/tasks/evaluation/eval_api/dataset_eval.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from abc import abstractmethod, ABCMeta
-from tasks.evaluation.eval_api.chat import Chat
+from .chat import Chat
 
 
 class DatasetEval(metaclass=ABCMeta):
diff --git a/modellink/tasks/evaluation/eval_impl/__init__.py b/modellink/tasks/evaluation/eval_impl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a1307aeca2e4639a1e6807acda4a07f1b9856eb
--- /dev/null
+++ b/modellink/tasks/evaluation/eval_impl/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/evaluation/eval_impl/agi_eval.py b/modellink/tasks/evaluation/eval_impl/agi_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/agi_eval.py
rename to modellink/tasks/evaluation/eval_impl/agi_eval.py
index 55ab368162c62de546898f02810e15ff7fa938d9..9d8c3f30972bc09a8161cc177ff3017cbdcdf9ae 100644
--- a/tasks/evaluation/eval_impl/agi_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/agi_eval.py
@@ -17,10 +17,10 @@ import logging
 import json
 import pandas as pd
 import tqdm
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import AGIEVAL_TEMPLATE_DIR
-from modellink.error_utils import check_divisible_by_zero
+from .template import AGIEVAL_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/bbh_eval.py b/modellink/tasks/evaluation/eval_impl/bbh_eval.py
similarity index 94%
rename from tasks/evaluation/eval_impl/bbh_eval.py
rename to modellink/tasks/evaluation/eval_impl/bbh_eval.py
index 761346d3d1b9b6bcbceb9b4d8fe406dbdce6b0d4..9ab8eefd60d81ef937d8f2746e584c6c95c3948a 100644
--- a/tasks/evaluation/eval_impl/bbh_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/bbh_eval.py
@@ -17,10 +17,11 @@ import logging
 import json
 import pandas as pd
 import tqdm
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import BBH_TEMPLATE_DIR
-from modellink.error_utils import check_divisible_by_zero
+
+from .template import BBH_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/boolq_eval.py b/modellink/tasks/evaluation/eval_impl/boolq_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/boolq_eval.py
rename to modellink/tasks/evaluation/eval_impl/boolq_eval.py
index ecc65cf11a9fe7ef07184dab3cbf0da3f2fc8862..c81fd59909a7c6b85ef1249ba940dff5d6307ac8 100644
--- a/tasks/evaluation/eval_impl/boolq_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/boolq_eval.py
@@ -19,9 +19,10 @@ import json
 import pandas as pd
 import tqdm
 
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
+
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/ceval_exam.py b/modellink/tasks/evaluation/eval_impl/ceval_exam.py
similarity index 94%
rename from tasks/evaluation/eval_impl/ceval_exam.py
rename to modellink/tasks/evaluation/eval_impl/ceval_exam.py
index 2fb0902d940eb205c333c8e2bf14ce74d1ce3a72..2c3a2821bb5372a9355c50ee229d7bbcbccaae34 100644
--- a/tasks/evaluation/eval_impl/ceval_exam.py
+++ b/modellink/tasks/evaluation/eval_impl/ceval_exam.py
@@ -18,10 +18,11 @@ import json
 import pandas as pd
 import tqdm
 
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import CEVAL_TEMPLATE_DIR
+from .template import CEVAL_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
+
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json
diff --git a/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson b/modellink/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson
diff --git a/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json
diff --git a/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json
diff --git a/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json b/modellink/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json
similarity index 100%
rename from tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json
rename to modellink/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json
diff --git a/tasks/evaluation/eval_impl/gsm8k_eval.py b/modellink/tasks/evaluation/eval_impl/gsm8k_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/gsm8k_eval.py
rename to modellink/tasks/evaluation/eval_impl/gsm8k_eval.py
index 5f7e9e46b56bb4707c2ae9d38649d170ad461f43..1a9d825c52a5c9b3d9b72c826cf7a04fdd77ad3c 100644
--- a/tasks/evaluation/eval_impl/gsm8k_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/gsm8k_eval.py
@@ -19,10 +19,11 @@ import logging
 import json
 import pandas as pd
 import tqdm
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import GSM8K_TEMPLATE_DIR
+
+from .template import GSM8K_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/human_eval.py b/modellink/tasks/evaluation/eval_impl/human_eval.py
similarity index 94%
rename from tasks/evaluation/eval_impl/human_eval.py
rename to modellink/tasks/evaluation/eval_impl/human_eval.py
index 268ffd9cd2066a439777d03a9cc1bab62ea9de11..f86054f678e5dbb9664138cfd46a43f3e047cfc5 100644
--- a/tasks/evaluation/eval_impl/human_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/human_eval.py
@@ -16,17 +16,17 @@
 import json
 import os
 import logging
-import stat
 import re
 import sys
 import subprocess
 from typing import Iterable, Dict
 import pandas as pd
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import CODE_TEST_LOG_DIR
-from modellink.error_utils import check_divisible_by_zero
-from modellink.utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES
+
+from .template import CODE_TEST_LOG_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
+from ....utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES
 
 logger = logging.getLogger(__name__)
 
diff --git a/tasks/evaluation/eval_impl/mmlu_eval.py b/modellink/tasks/evaluation/eval_impl/mmlu_eval.py
similarity index 95%
rename from tasks/evaluation/eval_impl/mmlu_eval.py
rename to modellink/tasks/evaluation/eval_impl/mmlu_eval.py
index a4d00cb9bcb4ba70bfd3bdb1bddcb9c09ac3c0db..aa2d85ab5ab28a88e6f4e0562391771000ee9e5a 100644
--- a/tasks/evaluation/eval_impl/mmlu_eval.py
+++ b/modellink/tasks/evaluation/eval_impl/mmlu_eval.py
@@ -20,10 +20,10 @@ import json
 import pandas as pd
 import tqdm
 
-from modellink.error_utils import check_divisible_by_zero
-from tasks.evaluation.eval_api.dataset_eval import DatasetEval
-from tasks.evaluation.eval_api.chat import Chat
-from tasks.evaluation.eval_impl.template import MMLU_TEMPLATE_DIR
+from .template import MMLU_TEMPLATE_DIR
+from ..eval_api.dataset_eval import DatasetEval
+from ..eval_api.chat import Chat
+from ....error_utils import check_divisible_by_zero
 
 logger = logging.getLogger(__name__)
 
diff --git a/modellink/tasks/evaluation/eval_impl/template.py b/modellink/tasks/evaluation/eval_impl/template.py
new file mode 100644
index 0000000000000000000000000000000000000000..488fcb017b0cae041f849a65e803329b5f678758
--- /dev/null
+++ b/modellink/tasks/evaluation/eval_impl/template.py
@@ -0,0 +1,21 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+GSM8K_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/gsm8k_3shot_template.json'
+MMLU_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/mmlu_5shot_template.json'
+CEVAL_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/ceval_5shot_template.json'
+AGIEVAL_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/AGI_fewshot.json'
+BBH_TEMPLATE_DIR = 'modellink/tasks/evaluation/eval_impl/fewshot_template/bbh_templatejson'
+CODE_TEST_LOG_DIR = 'modellink/tasks/evaluation/codecheck_log'
diff --git a/modellink/tasks/evaluation/utils.py b/modellink/tasks/evaluation/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2a147659a11625ce59c7f3d19f6dce83805c814
--- /dev/null
+++ b/modellink/tasks/evaluation/utils.py
@@ -0,0 +1,37 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def add_text_generate_args(parser):
+    group = parser.add_argument_group(title='text generation')
+    group.add_argument("--task-data-path",
+                       nargs='*',
+                       default=[],
+                       help='Path to the training dataset. Accepted format:'
+                            '1) a single data path, 2) multiple datasets in the'
+                            'form: dataset1-path dataset2-path ...')
+    group.add_argument("--temperature", type=float, default=0.5,
+                       help='Sampling temperature.')
+    group.add_argument("--evaluation-batch-size", type=int, default=1,
+                       help='Size of evaluation batch')
+    group.add_argument("--greedy", action='store_true', default=False,
+                       help='Use greedy sampling.')
+    group.add_argument("--top-p", type=float, default=0.9,
+                       help='Top p sampling.')
+    group.add_argument("--top-k", type=int, default=0,
+                       help='Top k sampling.')
+    group.add_argument("--max-new-tokens", type=int, default=128,
+                       help='Size of the output generated text.')
+    group.add_argument("--task", nargs='*', default=[], help='Choose one task from mmlu, boolq and gsm8k')
+    return parser
diff --git a/tasks/__init__.py b/modellink/tasks/finetune/__init__.py
similarity index 100%
rename from tasks/__init__.py
rename to modellink/tasks/finetune/__init__.py
diff --git a/tasks/evaluation/__init__.py b/modellink/tasks/finetune/lora/__init__.py
similarity index 100%
rename from tasks/evaluation/__init__.py
rename to modellink/tasks/finetune/lora/__init__.py
diff --git a/tasks/finetune/lora/utils.py b/modellink/tasks/finetune/lora/utils.py
similarity index 100%
rename from tasks/finetune/lora/utils.py
rename to modellink/tasks/finetune/lora/utils.py
diff --git a/modellink/tasks/inference/__init__.py b/modellink/tasks/inference/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a1307aeca2e4639a1e6807acda4a07f1b9856eb
--- /dev/null
+++ b/modellink/tasks/inference/__init__.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
diff --git a/tasks/inference/text_generation/__init__.py b/modellink/tasks/inference/text_generation/__init__.py
similarity index 100%
rename from tasks/inference/text_generation/__init__.py
rename to modellink/tasks/inference/text_generation/__init__.py
diff --git a/tasks/inference/text_generation/beam_utils.py b/modellink/tasks/inference/text_generation/beam_utils.py
similarity index 100%
rename from tasks/inference/text_generation/beam_utils.py
rename to modellink/tasks/inference/text_generation/beam_utils.py
diff --git a/tasks/inference/text_generation/communication.py b/modellink/tasks/inference/text_generation/communication.py
similarity index 99%
rename from tasks/inference/text_generation/communication.py
rename to modellink/tasks/inference/text_generation/communication.py
index f94a5a0ef8ce84a149a0866f248c18dba282fa64..382d8d7d551210cac73108fb5b83115be6ecdb5d 100644
--- a/tasks/inference/text_generation/communication.py
+++ b/modellink/tasks/inference/text_generation/communication.py
@@ -19,7 +19,7 @@
 import torch
 
 from megatron.core import parallel_state
-from modellink.error_utils import ensure_var_is_not_none
+from ....error_utils import ensure_var_is_not_none
 
 
 def recv_from_prev_pipeline_rank_(recv_buffer=None):
diff --git a/tasks/inference/text_generation/forward_step.py b/modellink/tasks/inference/text_generation/forward_step.py
similarity index 98%
rename from tasks/inference/text_generation/forward_step.py
rename to modellink/tasks/inference/text_generation/forward_step.py
index 923eb4e61aa018626b883c4c923e4258fc7671fd..eb8d0c5df6c21ab7c2e263374a67db180ab618b6 100644
--- a/tasks/inference/text_generation/forward_step.py
+++ b/modellink/tasks/inference/text_generation/forward_step.py
@@ -21,8 +21,9 @@ import torch
 
 from megatron import get_args
 from megatron.core import parallel_state
-from modellink.error_utils import check_equal
-from tasks.inference.text_generation.utils import forward_step as _forward_step_helper
+
+from .utils import forward_step as _forward_step_helper
+from ....error_utils import check_equal
 
 
 class InferenceParams:
diff --git a/tasks/inference/text_generation/generation.py b/modellink/tasks/inference/text_generation/generation.py
similarity index 99%
rename from tasks/inference/text_generation/generation.py
rename to modellink/tasks/inference/text_generation/generation.py
index e4e4971613b53e7181524b92cee44eab9b418b59..752022c81582e29a4185c3be5213bb60c84e5ffe 100644
--- a/tasks/inference/text_generation/generation.py
+++ b/modellink/tasks/inference/text_generation/generation.py
@@ -20,8 +20,8 @@ import torch.nn.functional as F
 
 from megatron import get_args
 from megatron.core import parallel_state
-from tasks.inference.text_generation.utils import pad_batch, top_k_logits
 
+from .utils import pad_batch, top_k_logits
 from .forward_step import ForwardStep
 from .beam_utils import BeamHypotheses
 from .communication import broadcast_from_last_pipeline_stage
diff --git a/tasks/inference/text_generation/infer_base.py b/modellink/tasks/inference/text_generation/infer_base.py
similarity index 98%
rename from tasks/inference/text_generation/infer_base.py
rename to modellink/tasks/inference/text_generation/infer_base.py
index b6e754a27005fc9ab695a1728b4b39184cc9be5c..be1354f3b456ec9391ffeb2d80b162785d00248f 100644
--- a/tasks/inference/text_generation/infer_base.py
+++ b/modellink/tasks/inference/text_generation/infer_base.py
@@ -26,10 +26,6 @@ logging.basicConfig(format="")
 logging.getLogger().setLevel(logging.INFO)
 
 
-
-
-
-
 def add_text_generate_args(parser):
     group = parser.add_argument_group(title='text generation')
     group.add_argument("--task",
@@ -40,6 +36,8 @@ def add_text_generate_args(parser):
     group.add_argument("--temperature", type=float, default=0.7, help='Sampling temperature.')
     group.add_argument("--max-length", type=int, default=256, help='Total length of text.')
     group.add_argument("--max-new-tokens", type=int, default=128, help='Size of the output generated text.')
+    group.add_argument('--inference-prompt-type', type=str, default='llama',
+            help="choose the prompt type for inference")
     return parser
 
 
diff --git a/tasks/inference/text_generation/module.py b/modellink/tasks/inference/text_generation/module.py
similarity index 98%
rename from tasks/inference/text_generation/module.py
rename to modellink/tasks/inference/text_generation/module.py
index c601fdfd46d4625d03c03d82d7dc182149a9675f..10755bce967044a92d0300fafee75aee510863a7 100644
--- a/tasks/inference/text_generation/module.py
+++ b/modellink/tasks/inference/text_generation/module.py
@@ -166,9 +166,9 @@ class MegatronModuleForCausalLM(MegatronModuleForCausalLMABC):
     def __init__(self, *args, **kwargs):
         super(MegatronModuleForCausalLM, self).__init__()
         from megatron import get_tokenizer
-        from tasks.inference.text_generation import greedy_search_or_sampling
-        from tasks.inference.text_generation import beam_search
-        from tasks.inference.text_generation.communication import broadcast_float_list
+        from .utils import greedy_search_or_sampling
+        from .generation import beam_search
+        from .communication import broadcast_float_list
 
         args = get_args()
         args.max_tokens_to_oom = args.max_tokens_to_oom if hasattr(args, "max_tokens_to_oom") else 4096
diff --git a/tasks/inference/text_generation/utils.py b/modellink/tasks/inference/text_generation/utils.py
similarity index 99%
rename from tasks/inference/text_generation/utils.py
rename to modellink/tasks/inference/text_generation/utils.py
index e716dbac44ae3e69a3b9b63408c6f743bb292a84..d3f780ee86b3b93f6f169bf1acfdd3173f477344 100644
--- a/tasks/inference/text_generation/utils.py
+++ b/modellink/tasks/inference/text_generation/utils.py
@@ -21,19 +21,19 @@ import torch
 import torch.nn.functional as F
 from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP
 
+
 from megatron import get_args
 from megatron import get_tokenizer
 from megatron.core import parallel_state
-
 from megatron.utils import get_ltor_masks_and_position_ids, unwrap_model
 from megatron.core.pipeline_parallel.p2p_communication import recv_forward, send_forward
-
 from megatron.core.distributed import DistributedDataParallel as LocalDDP
 from megatron.model import Float16Module
 from megatron.core.utils import get_model_config
 
-from tasks.inference.text_generation.communication import broadcast_tensor
-from tasks.finetune.lora.utils import is_enable_lora, get_lora_model_classes
+
+from .communication import broadcast_tensor
+from ...finetune.lora.utils import is_enable_lora, get_lora_model_classes
 
 
 def get_batch(context_tokens):
diff --git a/modellink/training.py b/modellink/training.py
index 4e393c72c5e85deb4bab10ea5202a4a90eb3e2b4..b4eaf311e42d344997e7455c31fafb771e7ff7e3 100644
--- a/modellink/training.py
+++ b/modellink/training.py
@@ -19,7 +19,7 @@ from functools import wraps
 from peft import LoraConfig, get_peft_model
 from megatron.arguments import core_transformer_config_from_args
 from megatron import get_args
-from tasks.finetune.lora.utils import is_enable_lora
+from .tasks.finetune.lora.utils import is_enable_lora
 
 
 def get_model_wrapper(fn):
diff --git a/tasks/finetune/__init__.py b/tasks/finetune/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/tasks/finetune/lora/__init__.py b/tasks/finetune/lora/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/tasks/inference/__init__.py b/tasks/inference/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/tasks/inference/inference_llama.py b/tasks/inference/inference_llama.py
deleted file mode 100644
index a051a8770344d3defb0677af299e85e9757431aa..0000000000000000000000000000000000000000
--- a/tasks/inference/inference_llama.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# coding=utf-8
-# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ascendspeed import megatron_adaptor
-
-import modellink
-from megatron import get_args
-from megatron.model import GPTModel
-from megatron.initialize import initialize_megatron
-from megatron.arguments import core_transformer_config_from_args
-from tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
-
-
-def model_provider(pre_process=True, post_process=True):
-    """Build the model."""
-    config = core_transformer_config_from_args(get_args())
-
-    init_model = GPTModel(
-        config,
-        parallel_output=False,
-        pre_process=pre_process,
-        post_process=post_process
-    )
-    return init_model
-
-
-if __name__ == "__main__":
-    initialize_megatron(extra_args_provider=add_text_generate_args,
-                        args_defaults={'no_load_rng': True,
-                                       'no_load_optim': True})
-
-    args = get_args()
-
-    model = GPTModel.from_pretrained(
-        model_provider=model_provider,
-        pretrained_model_name_or_path=args.load
-    )
-
-    task_factory(args, model)
diff --git a/tasks/inference/inference_mixtral.py b/tasks/inference/inference_mixtral.py
deleted file mode 100644
index 7ca311b4d8e36dfa0bd6b24a107a855dc1c53c15..0000000000000000000000000000000000000000
--- a/tasks/inference/inference_mixtral.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# coding=utf-8
-# Copyright (c) 2024, HUAWEI CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ascendspeed import megatron_adaptor
-
-import modellink
-from megatron import get_args
-from megatron.model import GPTModel
-from megatron.initialize import initialize_megatron
-from megatron.arguments import core_transformer_config_from_args
-from tasks.inference.text_generation.infer_base import task_factory, add_text_generate_args
-
-
-def model_provider(pre_process=True, post_process=True):
-    """Build the model."""
-    config = core_transformer_config_from_args(get_args())
-    init_model = GPTModel(
-        config,
-        parallel_output=False,
-        pre_process=pre_process,
-        post_process=post_process
-    )
-    return init_model
-
-
-if __name__ == "__main__":
-    initialize_megatron(extra_args_provider=add_text_generate_args,
-                        args_defaults={'no_load_rng': True,
-                                       'no_load_optim': True})
-
-    args = get_args()
-
-    model = GPTModel.from_pretrained(
-        model_provider=model_provider,
-        pretrained_model_name_or_path=args.load
-    )
-
-    system_template = "<s>"
-    dialog_template = "[INST] {instruction} [/INST] "
-    template = system_template + dialog_template
-
-    task_factory(args, model, system_template=system_template, dialog_template=dialog_template)
diff --git a/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py b/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py
index d0dacfdf08bebb99d6e9100399fe04c050e0b29f..428789200c8bad35a3cd70020177ae30a6024179 100644
--- a/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py
+++ b/tests/pipeline/baichuan-13B/test_convert_weight_from_huggingface.py
@@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[12], "iter_0000001")
diff --git a/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py b/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py
index 76a37964f20c327605c65b3ea81628eca7dd89dc..38e3f4009068493a3a852f212c265d45bc94ca44 100644
--- a/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py
+++ b/tests/pipeline/baichuan2-13B/test_convert_weight_from_huggingface.py
@@ -31,7 +31,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[12], "iter_0000001")
diff --git a/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py b/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py
index 04b4105c5e4cc664aa6a9fdab344a2fbe6a30374..16bba71414ddf8c3b5de70d26654e45dd8ea12c4 100644
--- a/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py
+++ b/tests/pipeline/bloom-7B/test_convert_ckpt_from_huggingface.py
@@ -23,7 +23,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[10], "iter_0000001")
diff --git a/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py b/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py
index 2cd8e4d5d5947a20ba52bb3ba6f09271e1e50444..0090794a1fda9566c2d7a5a125223104eab4ae8a 100644
--- a/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py
+++ b/tests/pipeline/intern-7B/test_convert_ckpt_from_huggingface.py
@@ -32,7 +32,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
diff --git a/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py b/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py
index 0c456b3dc9f3165507c56afb121cf9fe5772d2e7..b6730f3cd043e118c6f04972ee3bcc5a67485c33 100644
--- a/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py
+++ b/tests/pipeline/llama2-7B/test_convert_ckpt_from_huggingface.py
@@ -32,7 +32,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(self.config.convert_ckpt_param[11], "iter_0000001")
diff --git a/tests/ut/test_convert_ckpt_from_huggingface.py b/tests/ut/test_convert_ckpt_from_huggingface.py
index edc03bd8730478473592343c9ae9f5eb0a4f6d6e..58459d3f318cab1f3a0a781c7702b3c9c9c87929 100644
--- a/tests/ut/test_convert_ckpt_from_huggingface.py
+++ b/tests/ut/test_convert_ckpt_from_huggingface.py
@@ -41,7 +41,7 @@ class TestConvertCkptFromHuggingface(unittest.TestCase):
         to compose the unique transformer layer and all these layer stack to compose the entity of the model.
         """
         base_dir = Path(__file__).absolute().parent.parent.parent
-        file_path = os.path.join(base_dir, "tools/checkpoint/util.py")
+        file_path = os.path.join(base_dir, "tools/checkpoint/convert_ckpt.py")
         arguments = sys.argv[1:]
         subprocess.run(["python", file_path] + arguments)
         output_dir = os.path.join(sys.argv[12], "iter_0000001")
diff --git a/tools/checkpoint/util.py b/tools/checkpoint/convert_ckpt.py
similarity index 100%
rename from tools/checkpoint/util.py
rename to tools/checkpoint/convert_ckpt.py
diff --git a/tools/checkpoint/convert_util.sh b/tools/checkpoint/convert_util.sh
deleted file mode 100644
index edcaaeb70873f48eb3ed917561970868bbe85eb7..0000000000000000000000000000000000000000
--- a/tools/checkpoint/convert_util.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-CONVERT_ARGS="
-    --no-checking \
-    --model-type GPT \
-    --loader loader_llama2_hf \
-    --saver saver_megatron \
-    --target-tensor-parallel-size 4 \
-    --load-dir ./ckpt \
-    --save-dir ./save_dir \
-    --tokenizer-model None
-"
-source /usr/local/Ascend/ascend-toolkit/set_env.sh
-python tools/checkpoint/util.py $CONVERT_ARGS
\ No newline at end of file