This example notebook provides a small example how to implement and train a fully connected neural network via TensoFlow/Keras on the MNIST handwritten digits dataset.
%tensorflow_version 2.x
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
%matplotlib inline
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train.shape, y_train.shape, x_test.shape, y_test.shape
def show_train_imgs(n=8, m=5):
for i in range(m):
for j in range(n):
idx = np.random.randint(len(y_train))
plt.subplot(int('1' + str(n) + str(j+1)))
plt.imshow(x_train[idx], cmap='gray')
plt.title(y_train[idx], fontsize=30)
plt.axis('off')
plt.show()
plt.rcParams['figure.figsize'] = (15, 5)
show_train_imgs(8)
x_train.min(), x_train.max()
x_train = x_train.reshape(60000, 28*28)/255
x_test = x_test.reshape(10000, 28*28)/255
x_train.shape, x_test.shape, x_train.min(), x_train.max()
y_train[:5]
y_train_oh = keras.utils.to_categorical(y_train)
y_test_oh = keras.utils.to_categorical(y_test)
y_train_oh[:5]
The Sequential API...
model = keras.Sequential()
model.add(keras.layers.Dense(784, activation='relu', input_dim=784))
model.add(keras.layers.Dense(512, activation='relu'))
model.add(keras.layers.Dense(256, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.summary()
784*784+784, 784*512+512, 512*256+256, 256*128+128, 128*10+10
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(lr=1e-2), metrics=['accuracy'])
If you feel your model is much slower activate GPU on Google Colab via Runtime $\to$ Change runtime type $\to$ Hardware acceleraton $\to$ GPU During training the most importrant summary is shown. You can also save trianing history.
history = model.fit(x=x_train, y=y_train_oh, batch_size=64, epochs=15, validation_data=(x_test, y_test_oh))
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.xlabel('epochs', fontsize=15)
plt.legend(fontsize=20)
plt.show()
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.xlabel('epochs', fontsize=15)
plt.legend(fontsize=20)
plt.show()
Let's check the predictions, where the model goes wrong. Errorneous predictions are highlighted with a red dot. Also, from the learning curves above we can see, that the model is still not fully trained, the results are still improving.
def show_predictions(n=5, m=5):
for j in range(m):
idx_start = np.random.randint(len(x_test) - n)
preds = model.predict(x_test[idx_start:idx_start+5])
true_labels = y_test[idx_start:idx_start+5]
for i in range(n):
plt.subplot(int('1' + str(n) + str(i+1)))
predstr = 'pred: ' + str(preds[i].argmax()) + ', prob: ' + str(int(np.round(preds[i].max()*100,0))) + '%'
plt.title(predstr + ' / true: ' + str(true_labels[i]),fontsize=10)
plt.imshow(x_test[idx_start+i].reshape(28, 28)*255, cmap='gray')
if(preds[i].argmax() != true_labels[i]):
plt.scatter([14], [14], s=500, c='r')
plt.axis('off')
plt.show()
show_predictions(m=20)