[TOC]
本项目使用Verilog语言,设计实现一个简单的5级流水线CPU,兼容RV32I指令集。
语法上采用可综合语法完成设计,为简单可综合的5级流水线RISC-V处理器。
项目开发的初衷是实践微处理器结构与设计课程中讲述的基本知识,了解简单5级流水线微处理器设计中需要面临的基础问题,为之后的学习进步打好基础。
从体系结构角度,本项目实现了简单的5级流水线,使用阻塞方法处理所有数据相关、名相关和控制相关;从RISC-V设计角度,实现了M态下RV32I指令集。
具体实现指令如下:
add
指令功能 | reg[rd] = reg[rs1] + reg[rs2] |
---|---|
opcode | b0110011 |
funct3 | b000 |
funct7 | b0000000 |
sub
指令功能 | reg[rd] = reg[rs1] - reg[rs2] |
---|---|
opcode | b0110011 |
funct3 | b000 |
funct7 | b0100000 |
sll
指令功能 | reg[rd] = reg[rs1] << reg[rs2][4:0] |
---|---|
opcode | b0110011 |
funct3 | b001 |
funct7 | b0000000 |
slt
指令功能 | if reg[rs1] < reg[rs2] then set reg[rd] = 1 |
---|---|
opcode | b0110011 |
funct3 | b010 |
funct7 | b0000000 |
sltu
指令功能 | if reg[rs1] < reg[rs2] then set reg[rd] = 1 |
---|---|
opcode | b0110011 |
funct3 | b011 |
funct7 | b0000000 |
xor
指令功能 | reg[rd] = reg[rs1] ^ reg[rs2] |
---|---|
opcode | b0110011 |
funct3 | b100 |
funct7 | b0000000 |
srl
指令功能 | reg[rd] = reg[rs1] >> reg[rs2][4:0] |
---|---|
opcode | b0110011 |
funct3 | b101 |
funct7 | b0000000 |
sra
指令功能 | reg[rd] = reg[rs1] << reg[rs2][4:0] |
---|---|
opcode | b0110011 |
funct3 | b101 |
funct7 | b0100000 |
or
指令功能 | reg[rd] = reg[rs1] | reg[rs2] |
---|---|
opcode | b0110011 |
funct3 | b110 |
funct7 | b0000000 |
and
指令功能 | reg[rd] = reg[rs1] & reg[rs2] |
---|---|
opcode | b0110011 |
funct3 | b111 |
funct7 | b0000000 |
addi
指令功能 | reg[rd] = reg[rs1] + immediate |
---|---|
opcode | b0010011 |
funct3 | b000 |
slli
指令功能 | reg[rd] = reg[rs1] << shamt |
---|---|
opcode | b0010011 |
funct3 | b001 |
funct7 | b0000000 |
slti
指令功能 | if reg[rs1] < immediate then set reg[rd] = 1 |
---|---|
opcode | b0010011 |
funct3 | b010 |
sltiu
指令功能 | if reg[rs1] < immediate then set reg[rd] = 1 |
---|---|
opcode | b0010011 |
funct3 | b011 |
xori
指令功能 | reg[rd] = reg[rs1] ^ immediate |
---|---|
opcode | b0010011 |
funct3 | b100 |
srli
指令功能 | reg[rd] = reg[rs1] >> shamt |
---|---|
opcode | b0010011 |
funct3 | b101 |
funct7 | 0000000 |
srai
指令功能 | reg[rd] = reg[rs1] << shamt |
---|---|
opcode | b0010011 |
funct3 | b000 |
funct7 | b0100000 |
ori
指令功能 | reg[rd] = reg[rs1] + reg[rs2] |
---|---|
opcode | b0010011 |
funct3 | b000 |
andi
指令功能 | reg[rd] = reg[rs1] + immediate |
---|---|
opcode | b0010011 |
funct3 | b111 |
lb
指令功能 | reg[rd] = mem[reg[rs1] + immediate] |
---|---|
opcode | b0000011 |
funct3 | b000 |
lh
指令功能 | reg[rd] = mem[reg[rs1] + immediate] |
---|---|
opcode | b0000011 |
funct3 | b001 |
lw
指令功能 | reg[rd] = mem[reg[rs1] + immediate] |
---|---|
opcode | b0000011 |
funct3 | b010 |
lbu
指令功能 | reg[rd] = unsigned mem[reg[rs1] + immediate] |
---|---|
opcode | b0000011 |
funct3 | b100 |
lhu
指令功能 | reg[rd] = unsigned mem[reg[rs1] + immediate] |
---|---|
opcode | b0000011 |
funct3 | b101 |
jalr
指令功能 | jump to reg[rs1] + immediate and set reg[rd] = pc+4 |
---|---|
opcode | b0010011 |
funct3 | b000 |
lui
指令功能 | reg[rd] = immediate << 12 |
---|---|
opcode | 0110111 |
auipc
指令功能 | pc = pc+immediate << 12 then |
---|---|
opcode | 0010111 |
jal
指令功能 | jump to reg[rs1] + immediate and set reg[rd] = pc+4 |
---|---|
opcode | 1101111 |
sb
指令功能 | mem[reg[rs1] + offset] = reg[rs2] |
---|---|
opcode | 0100011 |
funct3(width) | 000 |
sh
指令功能 | mem[reg[rs1] + offset] = reg[rs2] |
---|---|
opcode | 0100011 |
funct3(width) | 001 |
sw
指令功能 | mem[reg[rs1] + offset] = reg[rs2] |
---|---|
opcode | 0100011 |
funct3(width) | 010 |
beq
指令功能 | Branch to pc+immediate, if reg[rs1] == reg[rs2] |
---|---|
opcode | 1100011 |
funct3 | 000 |
bne
指令功能 | Branch to pc+immediate, if reg[rs1] != reg[rs2] |
---|---|
opcode | 1100011 |
funct3 | 001 |
blt
指令功能 | Branch to pc+immediate, if reg[rs1] < reg[rs2] |
---|---|
opcode | 1100011 |
funct3 | 100 |
bge
指令功能 | Branch to pc+immediate, if reg[rs1] >= reg[rs2] |
---|---|
opcode | 1100011 |
funct3 | 101 |
bltu
指令功能 | Branch to pc+immediate, if unsigned reg[rs1] < unsigned reg[rs2] |
---|---|
opcode | 1100011 |
funct3 | 110 |
bgeu
指令功能 | Branch to pc+immediate, if unsigned reg[rs1] >= unsigned reg[rs2] |
---|---|
opcode | 1100011 |
funct3 | 111 |
未实现系统寄存器,仅实现32个整数寄存器
CPU主体结构示意图如下:
流水线示意图如下:
本项目中将ROM视为指令存储位置,RAM为Data存储位置,5级流水线的第一级Fetch Instruction 通过ROM和CPU通信完成,
本项目的模块层级结构如图所示,设顶层为Top。
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
clk | input | wire | 1 | 时钟信号 |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
ram_addr | wire | 10 | 用于CPU向RAM传递读写地址 |
ram_read_data | wire | 32 | 用于接收RAM中读出的数据,并传给CPU |
ram_write_data | wire | 32 | 用于接收CPU中传出的数据,并传给RAM写 |
ram_write_enable | wire | 1 | RAM写使能信号,为1时写RAM |
instruction_addr | wire | 10 | 指令地址,用于从ROM中读出指令 |
instruction | wire | 32 | 指令,用于接收ROM中读出的指令并传给CPU执行 |
具体实现
`include "CPU.v"
`include "RAM.v"
`include "ROM.v"
`timescale 1ns / 100ps
module Top (
input clk
);
wire [9:0] ram_addr;
wire [31:0] ram_read_data;
wire [31:0] ram_write_data;
wire ram_write_enable;
wire [9:0] instruction_addr;
wire [31:0] instruction;
CPU inst_CPU (
.clk (clk),
.ram_read_data (ram_read_data),
.instruction (instruction),
.ram_addr (ram_addr),
.ram_write_data (ram_write_data),
.ram_write_enable (ram_write_enable),
.instruction_addr (instruction_addr)
);
ROM inst_ROM (
.address(instruction_addr),
.data(instruction)
);
RAM inst_RAM (
.address (ram_addr),
.data_in (ram_write_data),
.write_enable (ram_write_enable),
.clk (clk),
.data_out (ram_read_data)
);
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
address | input | wire | 10 | 给出访问地址 |
data | output | wire | 32 | 读出访问地址对应的数据 |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
memory | reg | 32 x 1024 | 定义存储单元,有1024*4B = 4KB大小 |
具体实现
module ROM (
input [9:0] address,
output [31:0] data
);
reg [31:0] memory[1023:0];
assign data = memory[address];
initial begin
// Example program:
memory[0] = 32'h00000013; // nop (add x0 x0 0)
// start:
memory[1] = 32'h00100093; // addi x1 x0 1
memory[2] = 32'h00100313; // addi x6 x0 1
memory[3] = 32'h00400613; // addi x12 x0 4
memory[4] = 32'h00602023; // sw x6 0(x0) x6 R2 Dep (WB)
// loop:
memory[5] = 32'h00002303; // lw x6 0(x0)
memory[6] = 32'h00130313; // addi x6 x6 1 LoadStall and x6 R1 Dep (WB)
memory[7] = 32'h00602023; // sw x6 0(x0) x6 R2 Dep
memory[8] = 32'hFEC34AE3; // blt x6 x12 -12 x6 R1 Dep (WB)
// finish:
memory[9] = 32'h00000013; // nop (add x0 x0 0)
end
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
address | input | wire | 10 | 访存地址 |
data_in | input | wire | 32 | 给出需要写入的数据 |
wire_enable | input | wire | 1 | 写使能信号,为1时写入数据 |
clk | input | wire | 1 | 时钟信号,RAM需要在时钟信号下运行,需要刷新 |
data_out | output | wire | 32 | 读数据时,给出访存地址中存储的数据 |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
memory | reg | 32 x 1024 | 存储单元模拟,共4KB |
具体实现
module RAM (
input [9:0] address,
input [31:0] data_in,
input write_enable,
input clk,
output [31:0] data_out
);
reg [31:0] memory[1023:0];
assign data_out = memory[address];
always @ (posedge clk) begin
if (write_enable) begin
memory[address] <= data_in;
end
end
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
clk | input | wire | 1 | 时钟 |
ram_read_data | input | wire | 32 | 从RAM中读取的数据 |
instruction | input | wire | 32 | 从ROM中读取的指令 |
ram_addr | output | wire | 10 | 访问RAM的地址 |
ram_write_data | output | reg | 32 | 用于写RAM的数据 |
ram_write_enable | output | wire | 1 | 用于RAM的写使能信号 |
instruction_addr | output | wire | 10 | 指令地址,用于从ROM中取指令,为pc右移两位得到 |
参数列表
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
op_R_type | parameter | 7 | R-type指令的opcode |
op_I_type_load | parameter | 7 | I-type指令的opcode |
op_I_type_other | parameter | 7 | I-type指令的opcode |
op_I_type_jump | parameter | 7 | I-type指令的opcode |
op_S_type | parameter | 7 | S-type指令的opcode |
op_B_type | parameter | 7 | B-type指令的opcode |
op_U_type_load | parameter | 7 | U-type指令的opcode |
op_U_type_jump | parameter | 7 | U-type指令的opcode |
op_U_type_auipc | parameter | 7 | U-type指令的opcode |
type_register | parameter | ||
type_load | parameter | ||
type_store | parameter | ||
type_immediate | parameter | ||
type_upperImmediate | parameter | ||
type_brance | parameter | ||
decode | parameter | ||
execute | parameter | ||
memory | parameter | ||
writeback | parameter |
各流水段和段寄存器使用的信号列表
Fetch Instruction
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
instruction | reg | 32 | 为ROM中读取的指令 |
pc | reg | 10 | 指令计数器,用于计算instruction_addr |
IF/ID
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
instruction_deconde_2 | reg | 32 | 储存指令 |
pc_decode_2 | reg | 32 | 存储指令对应的pc |
reg_source_1_pipeline[decode] | reg | 5 | 存储rs1,为第一个操作数寄存器号 |
reg_source_2_pipeline[decode] | reg | 5 | 存储rs2,为第二个操作数寄存器号 |
reg_destinate_pipeline[decode] | reg | 5 | 存储rd,为目的寄存器号 |
type_pipeline[decode] | reg | 3 | 存储指令类型 |
Decode Instruction
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
ID/EXE
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
instruction_execute_3 | reg | 32 | 储存指令 |
pc_execute_3 | reg | 32 | 存储指令对应的pc |
reg_source_1_pipeline[execute] | reg | 5 | 存储rs1,为第一个操作数寄存器号 |
reg_source_2_pipeline[execute] | reg | 5 | 存储rs2,为第二个操作数寄存器号 |
reg_destinate_pipeline[execute] | reg | 5 | 存储rd,为目的寄存器号 |
type_pipeline[execute] | reg | 3 | 存储指令类型 |
Execute
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
opcode | wire | 7 | 处于Execute段(ID/EXE流水段寄存器)中的指令的opcode |
funct3 | wire | 3 | 处于Execute段(ID/EXE流水段寄存器)中的指令的funct3 |
reg_source_1 | wire | 5 | 处于Execute段(ID/EXE流水段寄存器)中的指令的rs1 |
reg_source_2 | wire | 5 | 处于Execute段(ID/EXE流水段寄存器)中的指令的rs2 |
funct7 | wire | 7 | 处于Execute段(ID/EXE流水段寄存器)中的指令的funct7 |
R_type | wire | 1 | 判断处于Execute段指令是否为R-type,若是,set to 1 |
I_type_load | wire | 1 | 判断处于Execute段指令是否为I-type 指令中的load类型指令,若是,set to 1 |
I_type_other | wire | 1 | 判断处于Execute段指令是否为I-type 指令中的其他类型指令,若是,set to 1 |
I_type_jump | wire | 1 | 判断处于Execute段指令是否为I-type 指令中的jump类型指令,若是,set to 1 |
I_type | wire | 1 | 判断处于Execute段指令是否为I-type,若是,set to 1 |
S_type | wire | 1 | 判断处于Execute段指令是否为S-type,若是,set to 1 |
B_type | wire | 1 | 判断处于Execute段指令是否为B-type,若是,set to 1 |
U_type_load | wire | 1 | 判断处于Execute段指令是否为U-type 指令中的load类型指令,若是,set to 1 |
U_type_jump | wire | 1 | 判断处于Execute段指令是否为U-type 指令中的jump类型指令,若是,set to 1 |
U_type_auipc | wire | 1 | 判断处于Execute段指令是否为U-type 指令中的auipc类型指令,若是,set to 1 |
U_type | wire | 1 | 判断处于Execute段指令是否为U-type,若是,set to 1 |
R_add ... B_bgeu | wire | 1 | 共37个信号,用于判断当前指令的具体操作,根据对opcode、funct3和funct7的译码得到 |
reg_source_1_data | wire signed | 32 | 用于ALU计算的操作数,为第一个操作数 |
reg_source_2_data | wire signed | 32 | 用于ALU计算的操作数,为第二个操作数 |
reg_source_1_data_unsigned | wire | 32 | 用于ALU计算的操作数,为第一个操作数的无符号数 |
reg_source_2_data_unsigned | wire | 32 | 用于ALU计算的操作数,为第二个操作数的无符号数 |
pc_alu_sel | wire | 1 | 用于指明是否改变pc的值,若为1,则置pc为alu计算结果 |
immediate_value | wire | 32 | 存储immediate value |
immediate_selection | wire | 3 | 用于存储8位编码器输出,作为ImmediateExtractor的输入,指出当前指令的格式,从而明确提取immediate value的方法 |
immediate_selection_inputs | wire | 8 | 用于8位编码器输入,转换为immediate_selection |
alu_operation_encoder_inputs | wire | 16 | 16位编码器输入,转换为alu_operation |
alu_operation | wire | 4 | 16位编码器输出,作为ALU输入,指出当前操作类型,明确需要进行的操作 |
alu_source_1_selection_inputs | wire | 4 | 4位编码器输入,转换为alu_source_1_selection |
alu_source_1_selection | wire | 2 | 4位编码器输出,用于指出需要选择的操作数来源 |
alu_source_2_selection_inputs | wire | 4 | 4位编码器输入,转换为alu_source_1_selection |
alu_source_2_selection | wire | 2 | 4位编码器输出,用于指出需要选择的操作数来源 |
EXE/MEM
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
instruction_memory_4 | reg | 32 | 储存指令 |
pc_memory_4 | reg | 32 | 存储指令对应的pc |
reg_source_1_pipeline[memory] | reg | 5 | 存储rs1,为第一个操作数寄存器号 |
reg_source_2_pipeline[memory] | reg | 5 | 存储rs2,为第二个操作数寄存器号 |
reg_destinate_pipeline[memory] | reg | 5 | 存储rd,为目的寄存器号 |
type_pipeline[memory] | reg | 3 | 存储指令类型 |
Memory
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
reg_destinate | wire | 5 | 用于给出写回Register File的目的寄存器号 |
ram_write_enable | wire | 1 | RAM写使能信号,Store指令且处于Memory段时置1 |
ram_addr | wire | 10 | 访问RAM的地址,为alu计算结果 |
MEM/WB
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
instruction_writeback_5 | reg | 32 | 储存指令 |
reg_source_1_pipeline[writeback] | reg | 5 | 存储rs1,为第一个操作数寄存器号 |
reg_source_2_pipeline[writeback] | reg | 5 | 存储rs2,为第二个操作数寄存器号 |
reg_destinate_pipeline[writeback] | reg | 5 | 存储rd,为目的寄存器号 |
type_pipeline[writeback] | reg | 3 | 存储指令类型 |
Write Back
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
opcode_writeback_5 | wire | 7 | WB段指令的opcode |
writeback_R_type … writeback_U_type | wire | 1 | 根据opcode_writeback_5置值,指明处于WB段的指令的类型 |
reg_write_enable | wire | 1 | 若为R-type, I-type, U-type指令,需要写寄存器 |
reg_writeback_selection_inputs | wire | 4 | 4位位编码器输入,转换为reg_writeback_selection |
reg_writeback_selection | wire | 2 | 4位编码器输出结果,指出指令类型,用于判断写回哪些数据 |
阻塞信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
data_dependence_hazard_r1 | wire | 1 | 若执行段中rs1同访存段的rd相同则置1 |
data_dependence_hazard_r2 | wire | 1 | 若执行段中rs2同访存段的rd相同则置1 |
data_dependence_hazard_r1_writeback | wire | 1 | 若执行段中rs1同写回段的rd相同则置1 |
data_dependence_hazard_r2_writeback | wire | 1 | 若执行段中rs2同写回段的rd相同则置1 |
load_stall | wire | 1 | 若ID段指令同EXE段指令存在load-->use关系,则置1 |
control_hazard_stall | wire | 1 | 若分支指令结果未出则阻塞 |
具体实现
`include "ALU.v"
`include "RegisterFile.v"
`include "ImmediateExtractor.v"
`include "Encoders.v"
module CPU (
input clk,
input [31:0] ram_read_data,
input [31:0] instruction,
output [9:0] ram_addr,
output reg [31:0] ram_write_data,
output ram_write_enable,
output [9:0] instruction_addr
);
////////////////////////////////////////////////////////////////////////////////////////////////
////// Define Parameter here ////////
////////////////////////////////////////////////////////////////////////////////////////////////
parameter [6:0] op_R_type = 7'h33;
parameter [6:0] op_I_type_load = 7'h03;
parameter [6:0] op_I_type_other = 7'h13;
parameter [6:0] op_I_type_jump = 7'h6F;
parameter [6:0] op_S_type = 7'h23;
parameter [6:0] op_B_type = 7'h63;
parameter [6:0] op_U_type_load = 7'h37;
parameter [6:0] op_U_type_jump = 7'h67;
parameter [6:0] op_U_type_auipc = 7'h17;
parameter type_register = 0;
parameter type_load = 1;
parameter type_store = 2;
parameter type_immediate = 3;
parameter type_upperImmediate = 4;
parameter type_branch = 5;
parameter decode = 0;
parameter execute = 1;
parameter memory = 2;
parameter writeback = 3;
////////////////////////////////////////////////////////////////////////////////////////////////
////// Define Segment registers here ////////
////////////////////////////////////////////////////////////////////////////////////////////////
reg [9:0] pc = 0;
assign instruction_addr = pc >> 2;
reg [9:0] pc_decode_2 = 0;
reg [31:0] instruction_deconde_2 = 0;
reg [9:0] pc_execute_3 = 0;
reg [31:0] instruction_execute_3 = 0;
reg [9:0] pc_memory_4 = 0;
reg [31:0] instruction_memory_4 = 0;
reg [31:0] alu_out_memory_4;
reg [31:0] instruction_writeback_5 = 0;
reg [31:0] reg_write_data_writeback_5 = 0;
reg [31:0] ram_read_data_writeback_5 = 0;
////////////////////////////////////////////////////////////////////////////////////////////////
////// Define Hazard Signal here ////////
////////////////////////////////////////////////////////////////////////////////////////////////
wire data_dependence_hazard_r1;
wire data_dependence_hazard_r2;
wire data_dependence_hazard_r1_writeback;
wire data_dependence_hazard_r2_writeback;
wire load_stall;
wire control_hazard_stall;
wire [1:0] reg_writeback_selection;
wire signed [31:0] reg_source_1_data_execute_3;
wire signed [31:0] reg_source_2_data_execute_3;
////////////////////////////////////////////////////////////////////////////////////////////////
////// Define ALU Signals here ////////
////////////////////////////////////////////////////////////////////////////////////////////////
wire [6:0] opcode = instruction_execute_3[6:0];
wire [4:0] reg_destinate = instruction_writeback_5[11:7];
wire [2:0] funct3 = instruction_execute_3[14:12];
wire [4:0] reg_source_1 = instruction_execute_3[19:15];
wire [4:0] reg_source_2 = instruction_execute_3[24:20];
wire [6:0] funct7 = instruction_execute_3[31:25];
wire R_type = opcode == op_R_type;
wire I_type_load = opcode == op_I_type_load;
wire I_type_other = opcode == op_I_type_other;
wire I_type_jump = opcode == op_I_type_jump;
wire I_type = I_type_jump || I_type_other || I_type_load;
wire S_type = opcode == op_S_type;
wire B_type = opcode == op_B_type;
wire U_type_load = opcode == op_U_type_load;
wire U_type_jump = opcode == op_U_type_jump;
wire U_type_auipc = opcode == op_U_type_auipc;
wire U_type = U_type_load || U_type_jump || op_U_type_auipc;
///////////////////////////////////////////////////////////////////////////
//// Ten R-Type Instruction of RV32I ////
///////////////////////////////////////////////////////////////////////////
wire R_add = R_type && funct3 == 3'h0 && funct7 == 7'h00;
wire R_sub = R_type && funct3 == 3'h0 && funct7 == 7'h20;
wire R_sll = R_type && funct3 == 3'h1 && funct7 == 7'h00;
wire R_slt = R_type && funct3 == 3'h2 && funct7 == 7'h00;
wire R_sltu = R_type && funct3 == 3'h3 && funct7 == 7'h00;
wire R_xor = R_type && funct3 == 3'h4 && funct7 == 7'h00;
wire R_srl = R_type && funct3 == 3'h5 && funct7 == 7'h00;
wire R_sra = R_type && funct3 == 3'h5 && funct7 == 7'h20;
wire R_or = R_type && funct3 == 3'h6 && funct7 == 7'h00;
wire R_and = R_type && funct3 == 3'h7 && funct7 == 7'h00;
///////////////////////////////////////////////////////////////////////////
//// Thirteen I-Type Instruction of RV32I ////
///////////////////////////////////////////////////////////////////////////
wire I_addi = I_type_other && funct3 == 3'h0;
wire I_slli = I_type_other && funct3 == 3'h1 && funct7 == 7'h00;
wire I_slti = I_type_other && funct3 == 3'h2;
wire I_sltiu = I_type_other && funct3 == 3'h3;
wire I_xori = I_type_other && funct3 == 3'h4;
wire I_srli = I_type_other && funct3 == 3'h5 && funct7 == 7'h00;
wire I_srai = I_type_other && funct3 == 3'h5 && funct7 == 7'h10;
wire I_ori = I_type_other && funct3 == 3'h6;
wire I_andi = I_type_other && funct3 == 3'h7;
wire I_lb = instruction_memory_4[6:0] == op_I_type_load && instruction_memory_4[14:12] == 3'h0;
wire I_lh = instruction_memory_4[6:0] == op_I_type_load && instruction_memory_4[14:12] == 3'h1;
wire I_lw = instruction_memory_4[6:0] == op_I_type_load && instruction_memory_4[14:12] == 3'h2;
wire I_lbu = instruction_memory_4[6:0] == op_I_type_load && instruction_memory_4[14:12] == 3'h4;
wire I_lhu = instruction_memory_4[6:0] == op_I_type_load && instruction_memory_4[14:12] == 3'h5;
wire I_jalr = I_type_jump;
///////////////////////////////////////////////////////////////////////////
//// Three U-Type Instruction of RV32I ////
///////////////////////////////////////////////////////////////////////////
wire U_lui = U_type_load;
wire U_auipc = U_type_auipc;
wire U_jal = U_type_jump;
///////////////////////////////////////////////////////////////////////////
//// Three S-Type Instruction of RV32I ////
///////////////////////////////////////////////////////////////////////////
wire S_lb = instruction_memory_4[6:0] == op_S_type && instruction_memory_4[14:12] == 3'h0;
wire S_lh = instruction_memory_4[6:0] == op_S_type && instruction_memory_4[14:12] == 3'h1;
wire S_lw = instruction_memory_4[6:0] == op_S_type && instruction_memory_4[14:12] == 3'h2;
///////////////////////////////////////////////////////////////////////////
//// Six B-Type Instruction of RV32I ////
///////////////////////////////////////////////////////////////////////////
wire B_beq = B_type && funct3 == 0;
wire B_bne = B_type && funct3 == 1;
wire B_blt = B_type && funct3 == 4;
wire B_bge = B_type && funct3 == 5;
wire B_bltu = B_type && funct3 == 6;
wire B_bgeu = B_type && funct3 == 7;
wire signed [31:0] reg_source_1_data = data_dependence_hazard_r1 ? alu_out_memory_4:
data_dependence_hazard_r1_writeback?
(reg_writeback_selection == 3? ram_read_data_writeback_5: reg_write_data_writeback_5)
: reg_source_1_data_execute_3;
wire signed [31:0] reg_source_2_data = data_dependence_hazard_r2 ? alu_out_memory_4:
data_dependence_hazard_r2_writeback?
(reg_writeback_selection == 3? ram_read_data_writeback_5: reg_write_data_writeback_5)
: reg_source_2_data_execute_3;
// wire [31:0] r1 = [:];
wire [31:0] reg_source_1_data_unsigned = reg_source_1_data;
wire [31:0] reg_source_2_data_unsigned = reg_source_2_data;
// if processing branch type inst, and the branch result is true, set pc_alu_sel = 1
wire pc_alu_sel = (B_beq && reg_source_1_data == reg_source_2_data)
|| (B_bne && reg_source_1_data != reg_source_2_data)
|| (B_blt && reg_source_1_data < reg_source_2_data)
|| (B_bge && reg_source_1_data >= reg_source_2_data)
|| (B_bltu && reg_source_1_data_unsigned < reg_source_2_data_unsigned)
|| (B_bgeu && reg_source_1_data_unsigned >= reg_source_2_data_unsigned)
|| I_jalr
|| U_jal;
assign ram_write_enable = instruction_memory_4[6:0] == op_S_type;
assign ram_addr = alu_out_memory_4[9:0];
reg [4:0] reg_source_1_pipeline[3:0]; // source register 1 's register of current stage, is a part of segment register
reg [4:0] reg_source_2_pipeline[3:0]; // source register 2 's register of current stage, is a part of segment register
reg [4:0] reg_destinate_pipeline[3:0]; // destinate register 's register of current stage, is a part of segment register
reg [2:0] type_pipeline[3:0]; // instruction types of current stage. [R-Type=0, Load=1, Store=2, Immediate or UpperImmediate=3, Branch=4]
assign data_dependence_hazard_r1 = reg_source_1_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& reg_source_1_pipeline[execute] == reg_destinate_pipeline[memory];
assign data_dependence_hazard_r2 = reg_source_2_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& type_pipeline[execute] != type_immediate
&& reg_source_2_pipeline[execute] == reg_destinate_pipeline[memory];
assign data_dependence_hazard_r1_writeback = reg_source_1_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& reg_source_1_pipeline[execute] == reg_destinate_pipeline[writeback];
assign data_dependence_hazard_r2_writeback = reg_source_2_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& type_pipeline[execute] != type_immediate
&& reg_source_2_pipeline[execute] == reg_destinate_pipeline[writeback];
assign load_stall = type_pipeline[execute] == type_load
&& (
(
type_pipeline[decode] != type_upperImmediate
&& type_pipeline[decode] != type_immediate
&& (
(reg_source_1_pipeline[decode] != 0 && reg_source_1_pipeline[decode] == reg_destinate_pipeline[execute])
|| (reg_source_2_pipeline[decode] != 0 && reg_source_2_pipeline[decode] == reg_destinate_pipeline[execute])
)
)
|| (
type_pipeline[decode] == type_immediate
&& reg_source_1_pipeline[decode] != 0
&& reg_source_1_pipeline[decode] == reg_destinate_pipeline[execute]
)
);
assign control_hazard_stall = instruction_deconde_2[6:0] == op_B_type || instruction_execute_3[6:0] == op_B_type;
// get the immediate value
// ImmediateExtractor
wire [31:0] immediate_value;
wire [2:0] immediate_selection;
wire [7:0] immediate_selection_inputs;
assign immediate_selection_inputs[0] = 0;
assign immediate_selection_inputs[1] = I_type;
assign immediate_selection_inputs[2] = U_type_load || U_type_auipc;
assign immediate_selection_inputs[3] = S_type;
assign immediate_selection_inputs[4] = B_type;
assign immediate_selection_inputs[5] = U_type_jump;
assign immediate_selection_inputs[6] = 0;
assign immediate_selection_inputs[7] = 0;
Encoder_8 inst_Encoder_8 (
.in(immediate_selection_inputs),
.out(immediate_selection)
);
ImmediateExtractor inst_ImmediateExtractor (
.instruction(instruction_execute_3),
.selection(immediate_selection),
.value(immediate_value)
);
// select the operation of alu
wire [15:0] alu_operation_encoder_inputs;
wire [3:0] alu_operation;
assign alu_operation_encoder_inputs[0] = R_add || I_addi;
assign alu_operation_encoder_inputs[1] = R_sub;
assign alu_operation_encoder_inputs[2] = R_and || I_andi;
assign alu_operation_encoder_inputs[3] = R_or || I_ori;
assign alu_operation_encoder_inputs[4] = R_xor || I_xori;
assign alu_operation_encoder_inputs[5] = R_sll || I_slli;
assign alu_operation_encoder_inputs[6] = R_srl || I_srli;
assign alu_operation_encoder_inputs[7] = R_sra || I_srai;
assign alu_operation_encoder_inputs[8] = R_slt || I_slti;
assign alu_operation_encoder_inputs[9] = R_sltu || I_sltiu;
assign alu_operation_encoder_inputs[10] = 0;
assign alu_operation_encoder_inputs[11] = 0;
assign alu_operation_encoder_inputs[12] = 0;
assign alu_operation_encoder_inputs[13] = 0;
assign alu_operation_encoder_inputs[14] = 0;
assign alu_operation_encoder_inputs[15] = 0;
Encoder_16 inst_Encoder_16 (
.in(alu_operation_encoder_inputs),
.out(alu_operation)
);
// select the input of alu
wire [3:0] alu_source_1_selection_inputs;
wire [3:0] alu_source_2_selection_inputs;
wire [1:0] alu_source_1_selection;
wire [1:0] alu_source_2_selection;
assign alu_source_1_selection_inputs[0] = 1;
assign alu_source_1_selection_inputs[1] = B_type || U_type_jump || U_type_auipc || I_type_jump;
assign alu_source_1_selection_inputs[2] = U_type_load;
assign alu_source_1_selection_inputs[3] = 0;
assign alu_source_2_selection_inputs[0] = 1;
assign alu_source_2_selection_inputs[1] = S_type || I_type || B_type || U_type;
assign alu_source_2_selection_inputs[2] = 0;
assign alu_source_2_selection_inputs[3] = 0;
Encoder_4 inst_Encoder_4_1 (.in(alu_source_1_selection_inputs), .out(alu_source_1_selection));
Encoder_4 inst_Encoder_4_2 (.in(alu_source_2_selection_inputs), .out(alu_source_2_selection));
// implement alu
reg [31:0] alu_source_1;
reg [31:0] alu_source_2;
wire [31:0] alu_out;
wire is_equal;
ALU inst_ALU(
.reg_data_1 (alu_source_1),
.reg_data_2 (alu_source_2),
.opcode (alu_operation),
.alu_result (alu_out),
.isEqual (is_equal)
);
always @ (*) begin
case (alu_source_1_selection)
0: alu_source_1 = reg_source_1_data;
1: alu_source_1 = pc_execute_3;
default: alu_source_1 = 'b0;
endcase
case (alu_source_2_selection)
0: alu_source_2 = reg_source_2_data;
1: alu_source_2 = immediate_value;
default: alu_source_2 = 'b0;
endcase
end
// Register File
wire [6:0] opcode_writeback_5 = instruction_writeback_5[6:0];
wire writeback_R_type = opcode_writeback_5 == op_R_type;
wire writeback_I_type_load = opcode_writeback_5 == op_I_type_load;
wire writeback_I_type_other = opcode_writeback_5 == op_I_type_other;
wire writeback_I_type_jump = opcode_writeback_5 == op_I_type_jump;
wire writeback_I_type = writeback_I_type_load || writeback_I_type_other || writeback_I_type_jump;
wire writeback_U_type_load = opcode_writeback_5 == op_U_type_load;
wire writeback_U_type_jump = opcode_writeback_5 == op_U_type_jump;
wire writeback_U_type_auipc = opcode_writeback_5 == op_U_type_auipc;
wire writeback_U_type = writeback_U_type_jump || writeback_U_type_load || writeback_U_type_auipc;
wire reg_write_enable = writeback_R_type || writeback_I_type || writeback_U_type;
wire [3:0] reg_writeback_selection_inputs;
assign reg_writeback_selection_inputs[0] = 0;
assign reg_writeback_selection_inputs[1] = writeback_R_type || writeback_U_type_load || writeback_I_type_other;
assign reg_writeback_selection_inputs[2] = writeback_U_type_jump || writeback_I_type_jump;
assign reg_writeback_selection_inputs[3] = writeback_I_type_load;
Encoder_4 writeback_selection_encoder(
.in(reg_writeback_selection_inputs),
.out(reg_writeback_selection)
);
// wire signed [31:0] reg_source_1_data_execute_3;
// wire signed [31:0] reg_source_2_data_execute_3;
wire [31:0] reg_writeback_data = reg_writeback_selection == 3? ram_read_data_writeback_5: reg_write_data_writeback_5;
RegisterFile inst_RegisterFile(
.reg_source_1 (reg_source_1),
.reg_source_2 (reg_source_2),
.reg_destinate (reg_destinate),
.reg_destinate_data (reg_writeback_data),
.write_enable (reg_write_enable),
.reg_source_1_data (reg_source_1_data_execute_3),
.reg_source_2_data (reg_source_2_data_execute_3)
);
// pipelining
// Stage 1: Fetch instruction, the instruction come from rom
always @ (posedge clk) begin
if (pc_alu_sel == 1) begin
pc <= alu_out[9:0];
end else begin
if (load_stall == 1 || control_hazard_stall == 1) begin
pc <= pc;
end else begin
pc <= pc + 4;
end
end
if (control_hazard_stall == 1) begin
reg_source_1_pipeline[decode] <= 0;
reg_source_2_pipeline[decode] <= 0;
reg_destinate_pipeline[decode] <= 0;
type_pipeline[decode] <= type_immediate;
end else begin
reg_source_1_pipeline[decode] <= instruction[19:15];
reg_source_2_pipeline[decode] <= instruction[24:20];
reg_destinate_pipeline[decode] <= instruction[11:7];
if (instruction[6:0] == op_R_type) begin
type_pipeline[decode] <= type_register;
end else if (instruction[6:0] == op_I_type_load) begin
type_pipeline[decode] <= type_load;
end else if (instruction[6:0] == op_S_type) begin
type_pipeline[decode] <= type_store;
end else if (instruction[6:0] == op_I_type_other || instruction[6:0] == op_I_type_jump) begin
type_pipeline[decode] <= type_immediate;
end else if (instruction[6:0] == op_B_type) begin
type_pipeline[decode] <= type_branch;
end
end
end
// Stage 2: Decode and get the source reg
always @ (posedge clk) begin
if (load_stall) begin
instruction_deconde_2 <= instruction_deconde_2;
pc_decode_2 <= pc_decode_2;
end else if (control_hazard_stall) begin
instruction_deconde_2 <= 32'h00000013;
pc_decode_2 <= pc_decode_2;
end else begin
instruction_deconde_2 <= instruction;
pc_decode_2 <= pc;
end
if (instruction_deconde_2[6:0] == op_R_type) begin
type_pipeline[execute] <= type_register;
end else if (instruction_deconde_2[6:0] == op_I_type_load) begin
type_pipeline[execute] <= type_load;
end else if (instruction_deconde_2[6:0] == op_S_type) begin
type_pipeline[execute] <= type_store;
end else if (instruction_deconde_2[6:0] == op_I_type_other || instruction_deconde_2[6:0] == op_I_type_jump) begin
type_pipeline[execute] <= type_immediate;
end else if (instruction_deconde_2[6:0] == op_B_type[6:0]) begin
type_pipeline[execute] <= type_branch;
end
reg_source_1_pipeline[execute] <= instruction_deconde_2[19:15];
reg_source_2_pipeline[execute] <= instruction_deconde_2[24:20];
reg_destinate_pipeline[execute] <= instruction_deconde_2[11:7];
if (load_stall) begin
reg_source_1_pipeline[execute] <= 0;
reg_source_2_pipeline[execute] <= 0;
reg_destinate_pipeline[execute] <= 0;
type_pipeline[execute] <= type_immediate;
end
end
// Stage 3: Execute
always @ (posedge clk) begin
if (load_stall) begin
instruction_execute_3 <= 32'h00000013;
pc_execute_3 <= pc_execute_3;
end else begin
pc_execute_3 <= pc_decode_2;
instruction_execute_3 <= instruction_deconde_2;
end
if (instruction_execute_3[6:0] == op_R_type) begin
type_pipeline[memory] <= type_register;
end else if (instruction_execute_3[6:0] == op_I_type_load) begin
type_pipeline[memory] <= type_load;
end else if (instruction_execute_3[6:0] == op_S_type) begin
type_pipeline[memory] <= type_store;
end else if (instruction_execute_3[6:0] == op_I_type_other || instruction_execute_3[6:0] == op_I_type_jump) begin
type_pipeline[memory] <= type_immediate;
end else if (instruction_execute_3[6:0] == op_B_type[6:0]) begin
type_pipeline[memory] <= type_branch;
end
reg_source_1_pipeline[memory] <= instruction_execute_3[19:15];
reg_source_2_pipeline[memory] <= instruction_execute_3[24:20];
reg_destinate_pipeline[memory] <= instruction_execute_3[11:7];
end
// Stage 4: Memory
always @ (posedge clk) begin
instruction_memory_4 <= instruction_execute_3;
pc_memory_4 <= pc_execute_3;
alu_out_memory_4 <= alu_out;
ram_write_data <= reg_source_2_data;
if (instruction_memory_4[6:0] == op_R_type) begin
type_pipeline[writeback] <= type_register;
end else if (instruction_memory_4[6:0] == op_I_type_load) begin
type_pipeline[writeback] <= type_load;
end else if (instruction_memory_4[6:0] == op_S_type) begin
type_pipeline[writeback] <= type_store;
end else if (instruction_memory_4[6:0] == op_I_type_other || instruction_memory_4[6:0] == op_I_type_jump) begin
type_pipeline[writeback] <= type_immediate;
end else if (instruction_memory_4[6:0] == op_B_type[6:0]) begin
type_pipeline[writeback] <= type_branch;
end
reg_source_1_pipeline[writeback] <= instruction_memory_4[19:15];
reg_source_2_pipeline[writeback] <= instruction_memory_4[24:20];
reg_destinate_pipeline[writeback] <= instruction_memory_4[11:7];
end
// Stage 5: writeback
always @ (posedge clk) begin
instruction_writeback_5 <= instruction_memory_4;
ram_read_data_writeback_5 <= ram_read_data;
case (reg_writeback_selection)
1:
begin
reg_write_data_writeback_5 <= alu_out_memory_4;
end
2:
begin
reg_write_data_writeback_5 <= pc_memory_4 + 4;
end
endcase
end
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
reg_data_1 | input | wire | 32 | 操作数1 |
reg_data_2 | input | wire | 32 | 操作数2 |
opcode | input | wire | 4 | 编码后的操作码,用于指示是实现什么操作 |
alu_result | output | reg | 32 | 用于存储计算后的结果 |
isEqual | output | wire | 1 | 若为1,则两个操作数相等 |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
reg_data_1_signed | wire signed | 32 | 存储符号扩展后的操作数1,未实现 |
reg_data_2_signed | wire signed | 32 | 存储符号扩展后的操作数2,未实现 |
具体实现
module ALU (
input [31:0] reg_data_1,
input [31:0] reg_data_2,
input [3:0] opcode,
output reg [31:0] alu_result,
output isEqual
);
wire signed [31:0] reg_data_1_signed;
wire signed [31:0] reg_data_2_signed;
assign isEqual = reg_data_1_signed == reg_data_2_signed;
always @ (*) begin
case (opcode)
0: alu_result = reg_data_1 + reg_data_2;
1: alu_result = reg_data_1 - reg_data_2;
2: alu_result = reg_data_1 & reg_data_2;
3: alu_result = reg_data_1 | reg_data_2;
4: alu_result = reg_data_1 ^ reg_data_2;
5: alu_result = reg_data_1 << reg_data_2;
6: alu_result = reg_data_1 >> reg_data_2;
7: alu_result = reg_data_1_signed >>> reg_data_2;
8: alu_result = (reg_data_1_signed < reg_data_2_signed )? 1: 0;
9: alu_result = (reg_data_1 < reg_data_2 )? 1: 0;
default: alu_result = 'b0;
endcase
end
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
in | input | wire | 4 or 8 or 16 | 需要编码的信号 |
out | output | reg | 2 or 3 or 4 | 编码后的结果 |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
具体实现
module Encoder_4 (
input [3:0] in,
output reg [1:0] out
);
// initial begin
// out <= 'b0;
// end
always @ (*) begin
casex (in)
4'b1xxx: out = 3;
4'b01xx: out = 2;
4'b001x: out = 1;
4'b0001: out = 0;
4'b0000: out = 0;
endcase
end
endmodule
module Encoder_8 (
input [7:0] in,
output reg [2:0] out
);
// initial begin
// out <= 'b0;
// end
always @ (*) begin
casex (in)
8'b1xxxxxxx: out = 7;
8'b01xxxxxx: out = 6;
8'b001xxxxx: out = 5;
8'b0001xxxx: out = 4;
8'b00001xxx: out = 3;
8'b000001xx: out = 2;
8'b0000001x: out = 1;
8'b00000001: out = 0;
endcase
end
endmodule
module Encoder_16 (
input [15:0] in,
output reg [3:0] out
);
// initial begin
// out <= 'b0;
// end
always @ (*) begin
casex (in)
16'b1xxxxxxxxxxxxxxx: out = 15;
16'b01xxxxxxxxxxxxxx: out = 14;
16'b001xxxxxxxxxxxxx: out = 13;
16'b0001xxxxxxxxxxxx: out = 12;
16'b00001xxxxxxxxxxx: out = 11;
16'b000001xxxxxxxxxx: out = 10;
16'b0000001xxxxxxxxx: out = 9;
16'b00000001xxxxxxxx: out = 8;
16'b000000001xxxxxxx: out = 7;
16'b0000000001xxxxxx: out = 6;
16'b00000000001xxxxx: out = 5;
16'b000000000001xxxx: out = 4;
16'b0000000000001xxx: out = 3;
16'b00000000000001xx: out = 2;
16'b000000000000001x: out = 1;
16'b0000000000000001: out = 0;
endcase
end
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
instruction | input | wire | 32 | 将指令传入,用于提取立即数 |
selection | input | wire | 3 | 用于选择解压方式,由指令类型决定 |
value | output | reg signed | 32 | 输出立即数 |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
imm_11_0 | wire | 12 | instruction[31:20],R-type immediate value |
imm_31_12 | wire | 20 | instruction[31:12],U-type immediate value,低12位填0 |
imm_4_0 | wire | 5 | instruction[4:0],Part of S-type immediate value |
imm_11_5 | wire | 7 | instruction[31:25],Part of S-type immediate value |
imm_11_B | wire | 1 | instruction[11],Part of B-type immediate value |
imm_4_1 | wire | 4 | instruction[10:7],Part of B-type immediate value |
imm_10_5 | wire | 6 | instruction[30:25],Part of B-type immediate value |
imm_12 | wire | 1 | instruction[31],Part of B-type immediate value |
imm_19_12 | wire | 8 | instruction[19:12],Part of J-type immediate value |
imm_11_J | wire | 1 | instruction[20],Part of J-type immediate value |
imm_10_1 | wire | 10 | instruction[30:21],Part of J-type immediate value |
imm_20 | wire | 1 | instruction[31],Part of J-type immediate value |
imm_I | wire signed | 32 | Store I-type immediate value |
imm_U | wire signed | 32 | Store U-type immediate value |
imm_B | wire signed | 32 | Store B-type immediate value |
imm_S | wire signed | 32 | Store S-type immediate value |
imm_J | wire signed | 32 | Store J-type immediate value |
具体实现
module ImmediateExtractor (
input [31:0] instruction,
input [2:0] selection,
output reg signed [31:0] value
);
initial begin
value <= 'b0;
end
wire [11:0] imm_11_0 = instruction[31:20];
wire [19:0] imm_31_12 = instruction[31:12];
wire [4:0] imm_4_0 = instruction[11:7];
wire [6:0] imm_11_5 = instruction[31:25];
wire imm_11_B = instruction[7];
wire [3:0] imm_4_1 = instruction[11:8];
wire [5:0] imm_10_5 = instruction[30:25];
wire imm_12 = instruction[31];
wire [7:0] imm_19_12 = instruction[19:12];
wire imm_11_J = instruction[20];
wire [9:0] imm_10_1 = instruction[30:21];
wire imm_20 = instruction[31];
wire signed [31:0] imm_I = { {32{imm_11_0[11]} }, imm_11_0};
wire signed [31:0] imm_U = { {32{imm_31_12[19]} }, imm_31_12, 12'h000};
wire signed [31:0] imm_B = { {32{imm_12}}, imm_11_B, imm_10_5, imm_4_1, 1'b0};
wire signed [31:0] imm_S = { {32{imm_11_5[6]}}, imm_11_5, imm_4_0};
wire signed [31:0] imm_J = { {32{imm_20}}, imm_19_12, imm_11_J, imm_10_1, 1'b0};
always @ (*) begin
case(selection)
1: value = imm_I;
2: value = imm_U;
3: value = imm_S;
4: value = imm_B;
5: value = imm_J;
default: value = 0;
endcase
end
endmodule
信号名 | 端口方向 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|---|
reg_source_1 | input | wire | 5 | rs1的寄存器号 |
reg_source_2 | input | wire | 5 | rs2的寄存器号 |
reg_destinate | input | wire | 5 | rd的寄存器号 |
reg_destinate_data | input | wire | 32 | 用于存储到目的寄存器中的数据 |
write_enable | input | wire | 1 | 写使能信号,若为1,则写数据到目的寄存器 |
reg_souce_1_data | output | wire | 32 | reg[rs1] |
reg_souce_2_data | output | wire | 32 | reg[rs2] |
内部信号
信号名 | 信号类型 | 信号宽度 | 信号功能 |
---|---|---|---|
registers | reg | 32 x 32 | 32个整数寄存器,均为32位 |
具体实现
module RegisterFile (
input [4:0] reg_source_1,
input [4:0] reg_source_2,
input [4:0] reg_destinate,
input [31:0] reg_destinate_data,
input write_enable,
output [31:0] reg_source_1_data,
output [31:0] reg_source_2_data
);
// reg [31:0] registers[31:0];
reg [31:0] registers[31:0];
integer i;
initial begin
registers[0] <= 0;
for (i = 1; i <= 32; i = i + 1)
begin
registers[i] <= 0;
end
end
assign reg_source_1_data = registers[reg_source_1];
assign reg_source_2_data = registers[reg_source_2];
always @ (*) begin
if (write_enable == 1 && reg_destinate != 0) begin
registers[reg_destinate] = reg_destinate_data;
end
end
endmodule
数据冒险包括RAW,WAW,WAR三种,本项目开发的CPU为顺序流出顺序结束的流水线CPU,不存在WAW和WAR,只需要解决RAW,即写后读。
写后读包括三种情况
本项目使用旁路和阻塞解决上述三种情况,通过专门设定检测信号来控制数据的传输,检测代码如下:
assign data_dependence_hazard_r1 = reg_source_1_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& reg_source_1_pipeline[execute] == reg_destinate_pipeline[memory];
assign data_dependence_hazard_r2 = reg_source_2_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& type_pipeline[execute] != type_immediate
&& reg_source_2_pipeline[execute] == reg_destinate_pipeline[memory];
assign data_dependence_hazard_r1_writeback = reg_source_1_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& reg_source_1_pipeline[execute] == reg_destinate_pipeline[writeback];
assign data_dependence_hazard_r2_writeback = reg_source_2_pipeline[execute] != 0
&& type_pipeline[execute] != type_upperImmediate
&& type_pipeline[execute] != type_immediate
&& reg_source_2_pipeline[execute] == reg_destinate_pipeline[writeback];
assign load_stall = type_pipeline[execute] == type_load
&& (
(
type_pipeline[decode] != type_upperImmediate
&& type_pipeline[decode] != type_immediate
&& (
(reg_source_1_pipeline[decode] != 0 && reg_source_1_pipeline[decode] == reg_destinate_pipeline[execute])
|| (reg_source_2_pipeline[decode] != 0 && reg_source_2_pipeline[decode] == reg_destinate_pipeline[execute])
)
)
|| (
type_pipeline[decode] == type_immediate
&& reg_source_1_pipeline[decode] != 0
&& reg_source_1_pipeline[decode] == reg_destinate_pipeline[execute]
)
);
检测信号说明
旁路实现说明
旁路的实现方法,是通过上述的检测信号判断ALU源操作数的来源,实现如下
wire signed [31:0] reg_source_1_data = data_dependence_hazard_r1 ? alu_out_memory_4:
data_dependence_hazard_r1_writeback?
(reg_writeback_selection == 3? ram_read_data_writeback_5: reg_write_data_writeback_5)
: reg_source_1_data_execute_3;
wire signed [31:0] reg_source_2_data = data_dependence_hazard_r2 ? alu_out_memory_4:
data_dependence_hazard_r2_writeback?
(reg_writeback_selection == 3? ram_read_data_writeback_5: reg_write_data_writeback_5)
: reg_source_2_data_execute_3;
以对源操作数1的数据来源判断为例
阻塞实现说明
本项目中解决控制相关的方式是阻塞,当B-type指令进入ID段时,使流水线停止,等待分支结果出现后再继续取指执行。检测分支的信号如下:
assign control_hazard_stall = instruction_deconde_2[6:0] == op_B_type || instruction_execute_3[6:0] == op_B_type;
nop
start:
addi x1, x0, 1
addi x6, x0, 1
addi x12, x0, 4
sw x6, 0(x0)
loop:
lw x6, 0(x0)
addi x6, x6, 1
sw x6, 0(x0)
blt x6, x12, -12
finish:
nop
4和6行存在RAW的第二种情况
9和10行存在RAW的第三种情况
10和11行存在RAW的第一种情况
blt为分支指令,存在控制冒险
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。