Keras samples:LeNet-5 on cifar10 dataset

Keras Samples: LeNet-5 on cifar10 dataset

从keras导入cifar10数据库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def load_cifar10(num_training=49000, num_validation=1000, num_test=1000):
# Fetch the CIFAR-10 dataset from the web
cifar10 = keras.datasets.cifar10.load_data()
(X_train, y_train), (X_test, y_test) = cifar10
X_train = np.asarray(X_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.int32).flatten()
X_test = np.asarray(X_test, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.int32).flatten()
# Subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Normaliza the data: subtract the mean pixel and divide by std
mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
X_train = (X_train - mean_pixel) / std_pixel
X_val = (X_val - mean_pixel) / std_pixel
X_test = (X_test - mean_pixel) / std_pixel
# one-hot the labels
y_train = keras.utils.to_categorical(y_train, 10)
y_val = keras.utils.to_categorical(y_val, 10)
y_test = keras.utils.to_categorical(y_test, 10)

return X_train, y_train, X_val, y_val, X_test, y_test
"""
Train data shape: (49000, 32, 32, 3)
Train labels shape: (49000, 10) float32
Validation data shape: (1000, 32, 32, 3)
Validation labels shape: (1000, 10)
Test data shape: (1000, 32, 32, 3)
Test labels shape: (1000, 10)
"""

labels需要进行one-hot因为预测采用softmax函数,需要每个标签的shape为(num_classes,)。

LeNet-5模型

LeNet-5,一个7层的卷积神经网络,被很多银行用于识别支票上的手写数字。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def simple_model(input_shape, num_classes=10):
# Define the input placeholder as a tensor with shape input_shape.
X_input = layers.Input(input_shape)

X = layers.Conv2D(filters=20, kernel_size=5, padding='same',
activation='relu')(X_input)
X = layers.MaxPool2D()(X)
X = layers.Conv2D(filters=50, kernel_size=5, padding='same',
activation='relu')(X)
X = layers.MaxPool2D()(X)
X = layers.Flatten()(X)
X = layers.Dense(500, activation='relu')(X)
X = layers.Dense(num_classes, activation='softmax')(X)

model = keras.Model(X_input, X)

return model

# 创建一个模型实例
model = simple_model((32, 32, 3), 10)
# 模型编译
model.compile(optimizer=keras.optimizers.SGD(lr=1e-2, momentum=0.9, nesterov=True),
loss=keras.losses.categorical_crossentropy, metrics=["accuracy"])
# 模型训练
history = model.fit(X_train, y_train, epochs=15, batch_size=64, verbose=2, validation_data=(X_val, y_val))
"""
Train Loss = 0.0719343089648
Train Accuracy = 0.976734693878
Val Loss = 1.77683620453
Val Accuracy = 0.708
"""

通过训练集和验证集的差距可以看出,模型对训练集过拟合了。

训练曲线如下:


可以看到训练中,虽然train的loss曲线逐渐下降,但是val的loss曲线却有上升。初以为有可能是学习率的问题。但是将学习率将为1e-3后,15个epoch后的结果如下:

貌似不错?其实train的loss曲线还比较直,还没有训练好,再把训练时间拉长,45个epoch后的结果如下:

所以,val的loss曲线上升,并不是学习率的问题,而是模型过拟合产生的!

LeNet-5 + Batch Normalization

为减少模型的过拟合,希望加入各种正则化手段。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def simple_model(input_shape, num_classes=10):
# Define the input placeholder as a tensor with shape input_shape.
X_input = layers.Input(input_shape)

X = layers.Conv2D(filters=20, kernel_size=5, padding='same',
activation='relu')(X_input)
X = layers.BatchNormalization(axis=3)(X)
X = layers.MaxPool2D(strides=(2, 2))(X)
X = layers.Conv2D(filters=50, kernel_size=5, padding='same',
activation='relu')(X)
X = layers.BatchNormalization(axis=3)(X)
X = layers.MaxPool2D(strides=(2, 2))(X)
X = layers.Flatten()(X)
X = layers.Dense(500)(X)
X = layers.BatchNormalization(scale=False)(X)
X = layers.Activation(activation='relu')(X)
X = layers.Dense(num_classes, activation='softmax')(X)

model = keras.Model(X_input, X)

return model
"""
Train Loss = 0.00337406960365
Train Accuracy = 1.0
Val Loss = 0.965107679367
Val Accuracy = 0.767
"""

加上BN之后有点点好转,尝试把batchsize变小一点,以增加的随机性强度,无明显效果。


LeNet-5 + BN + Dropout

只有两个全连接层,所以,只能在之间防止一个Dropout层。试着把rate调的高一点(rate是失活神经元比例)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def simple_model(input_shape, num_classes=10):
# Define the input placeholder as a tensor with shape input_shape.
X_input = layers.Input(input_shape)

X = layers.Conv2D(filters=20, kernel_size=5, padding='same',
activation='relu')(X_input)
X = layers.BatchNormalization(axis=3)(X)
X = layers.MaxPool2D(strides=(2, 2))(X)
X = layers.Conv2D(filters=50, kernel_size=5, padding='same',
activation='relu')(X)
X = layers.BatchNormalization(axis=3)(X)
X = layers.MaxPool2D(strides=(2, 2))(X)
X = layers.Flatten()(X)
X = layers.Dense(500)(X)
X = layers.BatchNormalization(scale=False)(X)
X = layers.Activation(activation='relu')(X)
X = layers.Dropout(rate=0.7)(X)
X = layers.Dense(num_classes, activation='softmax')(X)

model = keras.Model(X_input, X)

return model
"""
Train Loss = 0.36407692194
Train Accuracy = 0.879408163265
Val Loss = 0.750349837303
Val Accuracy = 0.76
"""


结果还是差强人意,后觉得是网络结构太复杂了?尝试减少层数,train结果反而也将下来了。

把epoch增加到50:



模型还是很容易拟合训练集,10个epoch之后,训练对val的损失或准确度的贡献就不大了。

1
2
3
4
5
6
"""
Train Loss = 0.029131395354
Train Accuracy = 0.997387755102
Val Loss = 0.905600978374
Val Accuracy = 0.775
"""

完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras import layers


def load_cifar10(num_training=49000, num_validation=1000, num_test=1000):
# Fetch the CIFAR-10 dataset from the web
cifar10 = keras.datasets.cifar10.load_data()
(X_train, y_train), (X_test, y_test) = cifar10
X_train = np.asarray(X_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.int32).flatten()
X_test = np.asarray(X_test, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.int32).flatten()
# Subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Normaliza the data: subtract the mean pixel and divide by std
mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
X_train = (X_train - mean_pixel) / std_pixel
X_val = (X_val - mean_pixel) / std_pixel
X_test = (X_test - mean_pixel) / std_pixel
# one-hot the labels
y_train = keras.utils.to_categorical(y_train, 10)
y_val = keras.utils.to_categorical(y_val, 10)
y_test = keras.utils.to_categorical(y_test, 10)

return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)


def simple_model(input_shape, num_classes=10):
# Define the input placeholder as a tensor with shape input_shape.
X_input = layers.Input(input_shape)

X = layers.Conv2D(filters=16, kernel_size=5, padding='same',
activation='relu')(X_input)
X = layers.BatchNormalization(axis=3)(X)
X = layers.MaxPool2D(strides=(2, 2))(X)
X = layers.Conv2D(filters=32, kernel_size=5, padding='same',
activation='relu')(X)
X = layers.BatchNormalization(axis=3)(X)
X = layers.MaxPool2D(strides=(2, 2))(X)
X = layers.Flatten()(X)
X = layers.Dense(500)(X)
X = layers.BatchNormalization(scale=False)(X)
X = layers.Activation(activation='relu')(X)
X = layers.Dropout(rate=0.7)(X)
X = layers.Dense(num_classes, activation='softmax')(X)

model = keras.Model(X_input, X)

return model


model = simple_model((32, 32, 3), 10)
model.compile(optimizer=keras.optimizers.SGD(lr=1e-2, momentum=0.9, nesterov=True),
loss=keras.losses.categorical_crossentropy, metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=50, batch_size=64, verbose=2, validation_data=(X_val, y_val))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train","val"],loc="upper left")
plt.show()
plt.figure()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title("model acc")
plt.ylabel("acc")
plt.xlabel("epoch")
plt.legend(["train","val"],loc="upper left")
plt.show()

preds = model.evaluate(x=X_train, y=y_train)
print()
print("Train Loss = " + str(preds[0]))
print("Train Accuracy = " + str(preds[1]))

preds = model.evaluate(x=X_val, y=y_val)
print()
print("Val Loss = " + str(preds[0]))
print("Val Accuracy = " + str(preds[1]))
-------------The End-------------