0%

基于tensorflow的多层感知机的代码实现

1
2
3
4
5
6
7
8
9
# coding=utf-8
# author: Shuigs18
# date: 2021-04-07

# 基于tensorflow的多层感知机的实现
# 三层(输入、隐藏、输出)MLP + K-fold + weight decay(L2正则化) + dropout
# Relu(隐藏) + softmax(输出)
# 数据集:fashion—mnist
# 梯度计算利用tensorflow
1
2
3
4
5
6
7
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
import numpy as np
import random
import time
from tensorflow.keras.datasets import fashion_mnist
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 读取数据并处理(测试集验证集)
# 定义模型参数
# 定义激活函数Relu和softmax
# 定义网络(dropout实现)
# 定义损失函数(加L2正则项 weight decay实现)
# K-fold 函数
# 先 k-fold 确定训练集和验证集
# 然后在将训练集生成SGD训练的迭代器
# train函数 (输入包含训练集迭代器和验证集)
# 小批量梯度下降
# 返回 fold 0,1,2,3,4,5 训练集验证集的误差
# predict函数 生成结果

# 数据处理
(X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()
batch_size = 256
X_train = tf.cast(X_train, tf.float32)
X_test = tf.cast(X_test, tf.float32)
X_train = X_train / 255.0 # 颜色的深浅没有关系
X_test = X_test / 255.0
# 划分批次
# train_iter = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
1
2
3
4
5
6
# 定义模型参数(一层隐藏层)
dim_inputs, dim_hiddens, dim_outputs = 784, 256, 10
W1 = tf.Variable(tf.random.normal(shape=(dim_inputs, dim_hiddens), mean=0.0, stddev=0.01, dtype=tf.float32))
b1 = tf.Variable(tf.zeros(dim_hiddens, dtype=tf.float32))
W2 = tf.Variable(tf.random.normal(shape=(dim_hiddens, dim_outputs), mean=0.0, stddev=0.01, dtype=tf.float32))
b2 = tf.Variable(tf.random.normal([dim_outputs], mean=0.0, stddev=0.01, dtype=tf.float32))
1
2
3
4
5
6
# 定义激活函数 ReLu softmax
def ReLu(X):
return tf.math.maximum(X, 0)

def softmax(X):
return tf.exp(X) / tf.reduce_sum(tf.math.exp(X), axis=1, keepdims=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# dropout(H, drop_prob)
# 网络net()
def dropout(H, drop_prob):
assert 0 <= drop_prob <= 1
keep_prob = 1- drop_prob
if keep_prob == 0:
return tf.zeros_like(H)
mask = tf.random.uniform(shape=H.shape, minval=0, maxval=1) < keep_prob
return tf.cast(mask, dtype=tf.float32) * tf.cast(H, dtype=tf.float32) / keep_prob

# 定义整个网络
drop_prob1 = 0.2
def net(X, training=False):
X = tf.reshape(X, shape=(-1, dim_inputs))
H1 = ReLu(tf.matmul(X, W1) + b1)
if training:
H1 = drop_out(H, drop_prob1)
return softmax(tf.matmul(H1, W2) + b2)
1
2
3
4
5
6
# 定义损失函数 交叉熵 L2正则项
def loss_cross_entropy(y_true, y_pred):
return tf.losses.sparse_categorical_crossentropy(y_true, y_pred)

def L2_penalty(W):
return tf.reduce_sum(W ** 2) / 2.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 定义get_K_fold_data函数
def get_K_fold_data(k, i, X, Y):
fold_size = X.shape[0] // k
X_train, Y_train = None, None
for j in range(k):
idx = slice(j * fold_size, (j + 1) * fold_size)
X_part, Y_part = X[idx, :], Y[idx]
if j == i:
X_valid, Y_valid = X_part, Y_part
elif X_train is None:
X_train, Y_train = X_part, Y_part
else:
X_train = tf.concat([X_train, X_part], axis=0)
Y_train = tf.concat([Y_train, Y_part], axis=0)
return X_train, Y_train, X_valid, Y_valid
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# 定义训练函数
def train(net, train_iter, X_valid, Y_valid, loss, num_epochs, batch_size, params=None, learning_rate=None):
train_loss_sum, valid_loss_sum = 0.0, 0.0
for epoch in range(num_epochs):
train_loss, valid_loss, n = 0.0, 0.0, 0
for X_train, Y_train in train_iter:
with tf.GradientTape() as tape:
Y_pred = net(X_train)
l = loss(Y_train, Y_pred)
# 计算梯度
grads = tape.gradient(l, params)
# 创建一个优化器
opt = tf.keras.optimizers.SGD(learning_rate = learning_rate)
# 梯度下降更新参数(批量梯度下降)
opt.apply_gradients(zip([grad / batch_size for grad in grads], params))
# 更新训练集损失值
train_loss += l.numpy().sum()
n += Y_train.shape[0]
valid_loss += (loss(Y_valid, net(X_valid)).numpy().sum() / Y_valid.shape[0])
train_loss /= n
train_loss_sum += train_loss
valid_loss_sum += valid_loss
train_loss_sum /= num_epochs
valid_loss_sum /= num_epochs

return params, train_loss_sum, valid_loss_sum

def k_fold(k, net, X_train, Y_train, num_epochs,
batch_size, loss_cross_entropy, params=None, learning_rate=None):
start_time = time.time()
for i in range(k):
data = get_K_fold_data(k, i, X_train, Y_train)
train_iter = tf.data.Dataset.from_tensor_slices((data[0], data[1])).batch(batch_size)
X_valid = data[2]
Y_valid = data[3]
params, train_loss, valid_loss = train(net, train_iter, X_valid, Y_valid, loss_cross_entropy, num_epochs, batch_size, params, learning_rate)
print("fold %d: train loss %f, valid loss %f" % (i, train_loss, valid_loss))
end_time = time.time()
print('总用时:%f' % (start_time - end_time))
return params
1
2
3
4
5
# 预测函数 predict()
def predict(net, params, X_test):
Y_pred = net(X_test)
result = tf.argmax(Y_pred, axis=1)
return result
1
2
3
4
5
6
7
8
9
10
11
params = [W1, b1, W2, b2]
num_epochs = 10
params = k_fold(5, net, X_train, Y_train, num_epochs, batch_size, loss_cross_entropy, params, learning_rate=0.1)
'''
fold 0: train loss 0.169400, valid loss 0.169741
fold 1: train loss 0.156253, valid loss 0.167162
fold 2: train loss 0.147749, valid loss 0.159257
fold 3: train loss 0.139382, valid loss 0.157192
fold 4: train loss 0.130207, valid loss 0.149647
总用时:-114.377255
'''
1
2
3
4
5
6
7
8
9
result = predict(net, params, X_test)
'''
<tf.Tensor: shape=(100,), dtype=int64, numpy=
array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 5, 3, 4, 1, 2, 2, 8, 0, 2, 5,
7, 5, 1, 4, 6, 0, 9, 6, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 0, 1, 6, 7,
6, 7, 2, 1, 2, 6, 4, 2, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1,
3, 3, 7, 8, 7, 0, 2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2,
0, 2, 5, 3, 6, 7, 1, 8, 0, 1, 2, 2])>
'''

Reference