wenda
/
example.config.yml

logging: False#日志
logging_path: "sqlite:///../record.db?check_same_thread=False"
# 日志地址 可以连接远程mysql服务器：'mysql+pymysql://root:123456@localhost:3306/db?charset=utf8'
port: 17860
#webui 默认启动端口号"
library:
   strategy: "calc:2 rtst:5 agents:0"
   #库参数，每组参数间用空格分隔，冒号前为知识库类型，后为抽取数量。

   #知识库类型:
   #serper      基于serper API的Google搜索，请在环境变量设置SERPER_API_KEY（https://serper.dev）
   #bing        cn.bing搜索，仅国内可用，目前处于服务降级状态
   #sogowx      sogo微信公众号搜索，可配合相应auto实现全文内容分析
   #fess        fess搜索引擎
   #rtst        支持实时生成的sentence_transformers
   #remote      调用远程闻达知识库，用于集群化部署
   #kg          知识图谱,暂未启用
   #特殊库：
   #mix         根据参数进行多知识库融合
   #agents      提供网络资源代理，没有知识库查找功能，所以数量为0
   #            （目前stable-diffusion的auto脚本需要使用其中功能，同时需开启stable-diffusion的api功能）

   count: 5
   #最大抽取数量（所有知识库总和）

   step: 2
   #知识库默认上下文步长
librarys:
   bing:
      count:
         5
         #最大抽取数量
   bingsite:
      count: 5
      #最大抽取数量
      site: "www.12371.cn"
      #搜索网站
   fess:
      count: 1
      #最大抽取数量
      fess_host: "127.0.0.1:8080"
      #fess搜索引擎的部署地址
   remote:
      host:
         "http://127.0.0.1:17860/api/find"
         #远程知识库地址地址
   rtst:
      count: 3
      #最大抽取数量
      #   backend: Annoy
      size: 20
      #分块大小"
      overlap: 0
      #分块重叠长度
      # model_path: "http://127.0.0.1:8000/v1" #在线embedding
      model_path: "model/m3e-base"
      #向量模型存储路径
      device: cuda
      #embedding运行设备
   qdrant:
      path: txt
      #知识库文本路径
      model_path: "model/text2vec-large-chinese"
      #向量模型存储路径
      qdrant_host: "http://localhost:6333"
      #qdrant服务地址"
      device: cpu
      #qdrant运行设备
      collection: qa_collection
      #qdrant集合名称
   kg:
      count: 5
      #最大抽取数量
      knowledge_path: ""
      #知识库的文件夹目录名称，若留空则为txt
      graph_host: ""
      #图数据库部署地址
      model_path: ""
      #信息抽取模型所在路径"
llm_type: rwkv
#llm模型类型:glm6b、rwkv、llama、replitcode等，详见相关文件
llm_models:
   rwkv:
      path: "model/rwkv-x060-3b-world-v2-28%trained-20231208-ctx4k.pth" #rwkv模型位置"
      # path: "model/RWKV-code-4-World-7B-20230820-ctx32k.pth" #rwkv模型位置"
      strategy: "cuda fp16"
      #   path: "model/rwkv_ggml_q8.bin"           #rwkv模型位置"
      #   strategy: "Q8_0"       #rwkvcpp:运行方式，设置strategy诸如"Q8_0->16"即可开启，代表运行Q8_0模型在16个cpu核心上
      #记得去https://github.com/saharNooby/rwkv.cpp/releases下最新版本文件替换librwkv.so或rwkv.dll
      #rwkv模型参数"

      historymode: state
      #rwkv历史记录实现方式：state、string。注意，对于torch实现，本参数已弃用，因为已经实现自动切换。
      state_source_device: cpu
      #torch实现下，会保存每个会话的state。每个占用2M显存，本参数控制将其复制到内存以节约显存。
      #置为cuda:0，代表state来自cuda:0，且需要将其复制到内存以节约显存。
      #置为cuda:1，同理。
      #置为cpu，代表不需要复制，如果使用cpu计算，或多卡计算需这么设置。
      presence_penalty: 0.2
      count_penalty: 0.2
   glm6b:
      path: "model\\chatglm3-6b"
      #glm模型位置"
      strategy: "cuda fp16"
      #cuda fp16	 所有glm模型 要直接跑在gpu上都可以使用这个参数
      #cuda fp16i8	 fp16原生模型 要自行量化为int8跑在gpu上可以使用这个参数
      #cuda fp16i4	 fp16原生模型 要自行量化为int4跑在gpu上可以使用这个参数
      #cuda:0 fp16 *14 -> cuda:1	fp16 多卡流水线并行，使用方法参考RWKV的strategy介绍。总层数28
      # lora: "model/chatglm_fitness_lora"
      #glm-lora模型位置
   internlm:
      path: model\internlm-chat-7b-8k
      #模型位置"
   qwen:
      path: model\Qwen-7B-Chat
      #模型位置"
   baichuan:
      #模型名字中有gptq会进入gptq模式，不加载lora，且会加载basename中的模型
      #百川
      # path: "model\\Baichuan-13B-Chat"
      # user: "<human>"
      # answer: "<bot>"
      # interface: ":"
      #vicuna
      path: model\Baichuan2-13B-Chat-4bits
      user: "Human"
      answer: "Assistant"
      interface: ": "
      # basename: gptq_model-4bit-128g
      #lora模型位置"
      # lora: "model/64rank"
   generic_transformers:
      #模型名字中有gptq会进入gptq模式，不加载lora，且会加载basename中的模型
      #百川
      # path: "model\\Baichuan-13B-Chat"
      # user: "<human>"
      # answer: "<bot>"
      # interface: ":"
      #vicuna
      # path: model\Llama-2-13B-chat-GPTQ
      # user: "Human"
      # answer: "Assistant"
      # interface: ": "
      # basename: gptq_model-4bit-128g
      # stopping_text: "\nHuman:"
      #SUS-Chat-34B-AWQ
      path: model\SUS-Chat-34B-AWQ
      user: "### Human: "
      answer: "\n### Assistant:\n"
      interface: ""
      stopping_text: "### Human: "
      #lora模型位置"
      # lora: "model/64rank"
   aquila:
      path: "AquilaChat-7B"
      #模型位置"
      strategy: "cuda fp16"
   llama:
      path: "model/stable-vicuna-13B.ggml.q4_2.bin"
      #llama模型位置
      #   strategy: "Q8_0"       #cpp:运行方式，设置strategy诸如"Q8_0"即可开启
      strategy: "cuda fp16i8"
   moss:
      path: "model/moss-moon-003-sft-plugin-int4"
      #模型位置
      strategy: ""
      #模型参数 暂时不用"
   openai:
      #   api_host: "https://gpt.lucent.blog/v1" #网友的反代
      #   api_host: "https://api.openai.com/v1" #官方地址
      api_host: "http://127.0.0.1:8000/v1" #rwkv runner
      # api_host: "http://127.0.0.1:3000/v1" #rwkv ai00

   replitcode:
      path: "model/replit-code-v1-3b"
      #replitcode模型位置
      #说明：目前模型参数和chat模型差异较大，写死了，不能通过wenda界面配置，需要调整自行到llm_replitcode.py 文件中调整，或放开wenda界面参数
      #y = model.generate(x, max_length=100, do_sample=true, top_p=0.95, top_k=4, temperature=0.2, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
      #模型地址：https://huggingface.co/replit/replit-code-v1-3b ，约10g
      #作用代码补全：问：def fibonacci(n):
      #答：def fibonacci(n):
      #if n == 0:
      #return 0
      #elif n == 1:
      #return 1
      #else:
      #return fibonacci(n-1) + fibonacci(n-2)
      #print(fibonacci(10))
      strategy: "cuda fp16"
      #因我的3070只有8g内存，所以是先把模理加载到内存进行精度转换，至少要32g系统内存， fp16 占用不到6g显存,fp32 超过8g未测试"