7 Star 2 Fork 324

openKylin / cybersectookits

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
LeNet5.py 11.18 KB
一键复制 编辑 原始数据 按行查看 历史
yujie 提交于 2023-05-11 12:15 . 加密流量检测的深度学习模型
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras import models
import numpy as np
import time
import os
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report
from tensorflow.python.keras.backend import dropout
import itertools
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import regularizers
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#gpu_id = '0,1'
#os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
#os.system('echo $CUDA_VISIBLE_DEVICES')
tf_config = tf.compat.v1.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf.compat.v1.Session(config=tf_config)
path = './'
font = {'family' : 'serif',
'color' : 'darkred',
'weight' : 'normal',
'size' : 14,
}
font_text = {'family' : 'serif',
'color' : 'darkred',
'weight' : 'normal',
'size' : 8,
}#'backgroundflow',
#用户生成混淆矩阵
matraix_list = ['non-VPN', 'VPN']
def LeNet5(train_x,test_x,train_y,test_y):
model = models.Sequential()
model.add(layers.Conv2D(10, (10, 10), strides=(5, 5), padding='SAME', activation='relu',
input_shape=(1500, 1500, 1))) # padding='SAME', 应该是不填充,因为大小变成了300*300
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(20, (10, 10), strides=(5, 5), padding='SAME', activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten()) # 扁平化层,将特征图变成一维层
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(2, activation='softmax'))
#model.add(layers.Dense(2, activation='Sigmoid'))
model.summary()#输出网络结构
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']) # 使用默认的优化参数:(α = 0.001, β1 = 0.9, β2 = 0.999, ϵ = 10−8)
#train_y = tf.squeeze(train_y)
#test_y = tf.squeeze(test_y)
#train_yy = tf.one_hot(train_y, depth=2)
#test_yy = tf.one_hot(test_y, depth=2)
train_yy = to_categorical(train_y,2)
test_yy = to_categorical(test_y,2)
# print(train_yy)
t1 = time.time()
history = model.fit(train_x,
train_yy,
epochs=4,
batch_size=4,
validation_data=(test_x, test_yy)
)
# test_loss, test_acc = model.evaluate(test_images, test_labels)
# print(test_loss)
time_train = time.time() - t1
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig(os.path.join(path,'total_CNN_accuracy_Epochs.eps'),dpi=300,format='eps')
plt.savefig(os.path.join(path,'total_CNN_accuracy_Epochs.png'))
plt.show()
#绘制训练损失和验证损失
plt.clf()
loss = history.history['loss']
val_loss = history.history['val_loss']
#epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
#plt.savefig(os.path.join(path,'total_CNN_loss_Epochs.eps'),dpi=300,format='eps')
plt.savefig(os.path.join(path,'total_CNN_loss_Epochs.png'))
plt.show()
t1 = time.time()
# out_pre = model.predict_classes(test_x)
out_pre0 = model.predict(test_x)
out_pre = np.argmax(out_pre0,axis=1) #change
time_test = time.time() - t1
file = open(os.path.join(path,'CNNv1_time.txt'),'w')
file.writelines('train_time:'+str(round(time_train,2)))
file.writelines('\ntest_time:'+str(round(time_test,2)))
file.close()
score(test_y,out_pre,history,'CNNv1')
def score(test_y,out_pre,history,classification): #代表分类器名称的str
df_reindex = pd.read_csv(os.path.join(path, 'reindex.csv'), encoding='gb2312')
df_data = pd.DataFrame(history.history)
precision, recall,f1_score,_ = metrics.precision_recall_fscore_support(test_y, out_pre,average = 'weighted')
print('Precision:{:.4f}'.format(precision),'Recall:{:.4f}'.format(recall),'f1_score:{:.4f}'.format(f1_score))
df_data.loc['score',0:3]= [precision, recall,f1_score]
df_data.to_csv(os.path.join(path,classification+'_score.csv'))
my_confusion_matrix = metrics.confusion_matrix(test_y,out_pre)
plt.figure(figsize=(10, 8), dpi=120)
plot_confusion_matrix(my_confusion_matrix,classes=matraix_list,normalize=True)
#plt.figure(figsize=(10, 8), dpi=120)
#plot_confusion_matrix(my_confusion_matrix, classes=list_y, normalize=True)
plt.savefig(os.path.join(path,classification+'_total_matrix.eps'),dpi=300,format='eps')
plt.savefig(os.path.join(path,classification+'_total_matrix.png'))
plt.show()
#print(out_class,len(out_class),len(y_train))
pre_df = pd.DataFrame(test_y,columns=['label'])
pre_df['pre'] = out_pre
pre_df['previousindex'] = df_reindex['previousindex']
# reindexdf = pd.DataFrame(reindex, columns=['previousindex'])
pre_df.to_csv(os.path.join(path,classification+'_predict.csv'),index=False)
# show the result as classification report报告每个样本的
# print(classification_report(test_y, out_pre, target_names=matraix_list, digits=4))
#输出每一类别的信息
cr = classification_report(test_y, out_pre, target_names=matraix_list, digits=4)
print(cr)
cr = cr.splitlines()
res = []
res.append([''] + cr[0].split())
for row in cr[2:-2]:
res.append(row.split())
lr = cr[-1].split()
res.append([''.join(lr[:3])] + lr[3:])
np.array(res)
df = pd.DataFrame(res)
df.to_csv(os.path.join(path,classification+'_report.csv'),index=False)
def extract(real,pre,testdata):
df_predict = pd.read_csv(os.path.join(path, 'CNNv1_predict.csv'), encoding='gb2312')
df_testdata = pd.read_csv(os.path.join(path, testdata), encoding='gb2312')
extract_data0=pd.DataFrame()
extract_data = pd.DataFrame(columns=df_testdata.columns.values)#如果没有列名的话,后面append的时候顺序会乱
for i in df_predict.index:
if df_predict['label'][i]==real and df_predict['pre'][i]==pre:#第一个是真是的标签,第二个是预测后的标签
extract_data0 = extract_data0.append(df_predict.loc[i], ignore_index=True)#默认为False,如果为true则不使用index标签
extract_data0.to_csv(os.path.join(path, 'extract0(real'+str(real)+'-pre'+str(pre)+').csv'), index=False)#index=False表示不加索引
# print(df_testdata.loc[67250],df_testdata.loc[67259])#也可以看到表格的第一行数据的索引为0,因此是对应的
# extract_data = extract_data.append(df_testdata.loc[0])#测试index是不是对应的
for i in extract_data0.previousindex:
extract_data = extract_data.append(df_testdata.loc[i], ignore_index=True)##忽略索引
extract_data.to_csv(os.path.join(path, 'extract(real' + str(real) + '-pre' + str(pre) + ').csv'), index=False)
'''
功能:输出混淆矩阵的图片
'''
def plot_confusion_matrix(cm, classes,
normalize=False,
cmap='Blues'):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=315, fontsize=10)
plt.yticks(tick_marks, classes, fontsize=10)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if cm[i, j] > 0:
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
size=10,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout(pad=1.5)
plt.ylabel('True label', fontdict=font)
plt.xlabel('Predicted label', fontdict=font)
def main():
print('Loading data...')
# x = np.fromfile('train-app.ev',dtype=np.uint8)
x = np.fromfile('F:/RTM/train/ev/Compare-1229train(+background)-nolol.ev',dtype=np.uint8)
# x = np.fromfile('D:/CompareLabDATA/Part_TrainData/All-Split/Train-pcap(+background)/Compare/EV/EV0103/Compare-1229train(+background)-0103.ev',dtype=np.uint8)
print(int(x.shape[0] / 2250000))
x_train = x.reshape(int(x.shape[0] / 2250000), 1500, 1500, 1)
input_train = x_train.astype('float32') / 255
y_train = pd.read_csv('F:/RTM/train/csv/Compare-1229train(+background)-nolol.csv')
ytrain = y_train["label"].values
# print(y_train.index)
# y_train = pd.read_csv('D:/CompareLabDATA/Part_TrainData/All-Split/Train-pcap(+background)/Compare/CSV/CSV0103/Compare-1229train(+background)-0103.csv')
# train_labels = pd.read_csv('train-app.csv')
# print(y_train.shape)
# y_train = tf.reshape(y_train, [-1, 1])
# train_labels = to_categorical(y_train)
test_x = np.fromfile('F:/RTM/test/ev/Compare-1229train(+background)-nolol.ev', dtype=np.uint8)
# test_x = np.fromfile('D:/CompareLabDATA/Part_TrainData/All-Split/Test-pcap(+background)/Compare/EV/EV0103/Compare-1229test(+background)-0103.ev', dtype=np.uint8)
print(int(test_x.shape[0] / 2250000))
test_x = test_x.reshape(int(test_x.shape[0] / 2250000), 1500, 1500, 1)
test_x = test_x.astype('float32') / 255
test_y = pd.read_csv('F:/RTM/test/csv/Compare-1229train(+background)-nolol.csv')
testy = test_y["label"].values
# test_y = pd.read_csv('D:/CompareLabDATA/Part_TrainData/All-Split/Test-pcap(+background)/Compare/CSV/CSV0103/Compare-1229test(+background)-0103.csv')
indexdf = pd.DataFrame(test_y.index, columns=['previousindex'])
indexdf.to_csv('reindex.csv')
# input_train,test_x,y_train,test_y = train_test_split(input_train, y_train, test_size=0.3,random_state=0)#拆分数据集
LeNet5(input_train,test_x,ytrain,testy)
# extract(0, 11, testdata)
# train_x,test_x,train_y,test_y = train_test_split(input_train,y_train,test_size=0.3, random_state=0) #打乱了数据顺序
if __name__ =='__main__':
main()
1
https://gitee.com/openkylin/cybersectookits.git
git@gitee.com:openkylin/cybersectookits.git
openkylin
cybersectookits
cybersectookits
master

搜索帮助