This is my practice to classify the classic MNIST handwritten digits data using TF's Layers API.
# In[]: Import libraries
import time
import numpy as np
import scipy.signal
import scipy.misc
import math #for ceil fn
import gzip
import struct
# Load and examine python and TF
import sys
print ("\n Python version is:", sys.version_info, "\n")
import tensorflow as tf
from tensorflow.python.client import device_lib
print ("\n Local devices: \n")
print (device_lib.list_local_devices() )
print ("\n GPU device name: \n")
print (tf.test.gpu_device_name() )
# In[]:
import os
os.chdir(r"C:\Users\rf\Google Drive\Education\Python\Codes\PML\data") # no last slash
try:
img = scipy.misc.imread('./example-image.png', mode='RGB')
except AttributeError:
s = ("scipy.misc.imread requires Python's image library PIL"
" You can satisfy this requirement by installing the"
" userfriendly fork PILLOW via `pip install pillow`.")
raise AttributeError(s)
print('Image shape:', img.shape)
print('Number of channels:', img.shape[2])
print('Image data type:', img.dtype)
print(img[100:102, 100:102, :])
# In[]:
# # Implementing a deep convolutional neural network using TensorFlow
# ## The multilayer CNN architecture
# ## Loading and preprocessing the data
## unzips mnist
zipped_mnist = [f for f in os.listdir('./')
if f.endswith('ubyte.gz')]
for z in zipped_mnist:
with gzip.GzipFile(z, mode='rb') as decompressed, open(z[:-3], 'wb') as outfile:
outfile.write(decompressed.read())
def load_mnist(path, kind='train'):
"""Load MNIST data from `path`"""
labels_path = os.path.join(path,
'%s-labels-idx1-ubyte'
% kind)
images_path = os.path.join(path,
'%s-images-idx3-ubyte'
% kind)
with open(labels_path, 'rb') as lbpath:
magic, n = struct.unpack('>II',
lbpath.read(8))
labels = np.fromfile(lbpath,
dtype=np.uint8)
with open(images_path, 'rb') as imgpath:
magic, num, rows, cols = struct.unpack(">IIII",
imgpath.read(16))
images = np.fromfile(imgpath,
dtype=np.uint8).reshape(len(labels), 784)
return images, labels
X_data, y_data = load_mnist('./', kind='train')
print('Rows: %d, Columns: %d' % (X_data.shape[0], X_data.shape[1]))
X_test, y_test = load_mnist('./', kind='t10k')
print('Rows: %d, Columns: %d' % (X_test.shape[0], X_test.shape[1]))
X_train, y_train = X_data[:50000,:], y_data[:50000]
X_valid, y_valid = X_data[50000:,:], y_data[50000:]
print('Training: ', X_train.shape, y_train.shape)
print('Validation: ', X_valid.shape, y_valid.shape)
print('Test Set: ', X_test.shape, y_test.shape)
# In[]:
# How large of a batch can I fit into my GPU memory? time to upgrade yet?
def batch_generator(X, y, batch_size=64, shuffle=False, batch_seed=None):
idx = np.arange(y.shape[0])
if shuffle:
rng = np.random.RandomState(batch_seed) #must be a differnt number for each epoch
rng.shuffle(idx) #reshuffles the indexes (row numbers)
X = X[idx] #returns the data in the new order
y = y[idx]
for i in range(0, X.shape[0], batch_size):
yield (X[i:i+batch_size, :], y[i:i+batch_size]) #yield the data sizes of batch_size
#batch_gen = batch_generator(X_train_centered, y_train, shuffle=True)
#print (type(batch_gen)) #class generator - no data in it, need to call from a loop with enumerate
#print (enumerate(batch_gen))
mean_vals = np.mean(X_train, axis=0)
std_val = np.std(X_train)
X_train_centered = (X_train - mean_vals)/std_val
X_valid_centered = (X_valid - mean_vals)/std_val
X_test_centered = (X_test - mean_vals)/std_val
#free memory
del X_data, y_data, X_train, X_valid, X_test
# In[]:
# ## Implementing a CNN in the TensorFlow layers API
# Moved some hyper parameter definition to the Session call
learning_rate=1e-4
dropout_rate=0.5
shuffle=True
graph_seed=1 #for initial weights, so can start at the same values in a new session
validation_set =1 #to score
np.random.seed(graph_seed) # for vars/weights
# In[]:
#can give custom names only to layers
#define the graph
g = tf.Graph()
with g.as_default():
## set random-seed: but for whom? the graph-level seed - To make the random sequences generated by all ops be repeatable across sessions
tf.set_random_seed(graph_seed) #for starting variables/weights then, so we can compare
## build the network:
## Placeholders for X and y: - do not rename
tf_x = tf.placeholder(tf.float32,
shape=[None, 784],
name='tf_x')
tf_y = tf.placeholder(tf.int32,
shape=[None],
name='tf_y')
is_train = tf.placeholder(tf.bool,
shape=(),
name='is_train')
## reshape x to a 4D tensor:
## [batchsize, width, height, 1]
tf_x_image = tf.reshape(tf_x, shape=[-1, 28, 28, 1],
name='Input_x_2dimages')
print('tf_x_image:',tf_x_image.get_shape())
## One-hot encoding:
tf_y_onehot = tf.one_hot(indices=tf_y, depth=10,
dtype=tf.float32,
name='Input_y_onehot')
## 1st layer: Conv_1 - for every obs produces per 32 obs - so, 5x5 with padding=VALID - becomes 24x24 images
h1 = tf.layers.conv2d(tf_x_image,
kernel_size=(5, 5),
filters=32,
activation=tf.nn.relu,
padding='VALID', # VALID is default, no 0 padding, same would give 28x28
name='Conv2d_h1')
print('h1:',h1.get_shape())
## MaxPooling - halves the images to 12x12, 32 per obs
h1_pool = tf.layers.max_pooling2d(h1,
pool_size=(2, 2),
strides=(2, 2))
print('h1_pool:',h1_pool.get_shape())
## 2n layer: Conv_2 - 8x8 images - also padding=VALID
## Saw in Tensorboard - Kernel here is 5x5x32x64 - so, 32 to is like multiple channels that are added together
## so output is 64 such "summed" conv results
h2 = tf.layers.conv2d(h1_pool, kernel_size=(5,5),
filters=64, #can be any number, how does it multiply?
activation=tf.nn.relu,
name='Conv2d_h2')
print('h2:',h2.get_shape())
## MaxPooling - halves the images, 4x4, 64 per obs
h2_pool = tf.layers.max_pooling2d(h2,
pool_size=(2, 2),
strides=(2, 2))
print('h2_pool:',h2_pool.get_shape())
## 3rd layer: Fully Connected - 4x4x64 flattend to 1024 features/pixels per obs
input_shape = h2_pool.get_shape().as_list()
n_input_units = np.prod(input_shape[1:])
h2_pool_flat = tf.reshape(h2_pool,
shape=[-1, n_input_units],
name='Flatten')
print('h2_pool_flat:',h2_pool_flat.get_shape())
## a relu activation on 1024 features per obs - yes, 1024x1024 - 1024 wgts in each 1024 activation fns
h3 = tf.layers.dense(h2_pool_flat, 1024,
activation=tf.nn.relu,
name='Activations')
print('h3:',h3.get_shape())
## Dropout - 1024 activations per obs are now treated as weights which are regularized to prevent overfitting
h3_drop = tf.layers.dropout(h3,
rate=dropout_rate,
training=is_train)
print('h3_drop:',h3_drop.get_shape())
## 4th layer: Fully Connected (linear activation) - 10 y-dummies per obs now x 1024 features
h4 = tf.layers.dense(h3_drop, 10,
activation=None,
name="Output_layer") #shows as two dense_ cells TB?
print('h4:',h4.get_shape())
## Prediction
predictions = {
'probabilities': tf.nn.softmax(h4,
name='probabilities'),
'labels': tf.cast(tf.argmax(h4, axis=1),
tf.int32, name='labels')}
## Loss Function and Optimization
cross_entropy_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
logits=h4, labels=tf_y_onehot),
name='cross_entropy_loss') #do not rename
## Optimizer:
optimizer = tf.train.AdamOptimizer(learning_rate)
optimizer = optimizer.minimize(cross_entropy_loss, name='train_op')
## Finding accuracy
correct_predictions = tf.equal(
predictions['labels'],
tf_y, name='aorrect_preds')
accuracy = tf.reduce_mean(
tf.cast(correct_predictions, tf.float32),
name='accuracy')
## define the initializer and saver after the network
## initializer
init_op = tf.global_variables_initializer()
## saver
saver = tf.train.Saver()
# In[32]: #start training
layer_start = time.time()
#Define hyper parameters
epochs=20
batchsize=3500 #pretty large
with tf.Session(graph=g) as sess:
sess.run(init_op)
train_cost_ = []
X_data = X_train_centered
y_data = y_train
X_valid = X_valid_centered
y_valid = y_valid
# main loop to go through batches
for epoch in range(1, epochs + 1):
batch_gen = batch_generator(X_data, y_data, batch_size=batchsize, shuffle=shuffle, batch_seed=None)
#batches have numbers, each epoch needs new random seed - so each run is different
#if seed=1, then epoch performance is the same, 2 reasons: tf.set_random_seed sets the starting weights the same, and batch seed create the same batches every time
avg_loss = 0.0
for i, (batch_x,batch_y) in enumerate(batch_gen):
feed = {'tf_x:0': batch_x,
'tf_y:0': batch_y,
'is_train:0': True} ## for dropout
loss, _ = sess.run(['cross_entropy_loss:0', 'train_op'],
feed_dict=feed) #this is the part that actually feeds the data into the cnn
avg_loss += loss
print('Epoch %02d: Training Avg. Loss: ''%7.3f' % (epoch, avg_loss), end=' ')
#will have to do accuracy in batches too - and then just average the accuracy, not really OK, but fine.
if validation_set is not None:
valid_accuracy_sum =0
batch_num = math.ceil(len(y_valid) / batchsize) # to avg the accuracies, need the ceiling fn
batch_gen_v = batch_generator(X_valid, y_valid, batch_size=batchsize, shuffle=False)
for i, (batch_x,batch_y) in enumerate(batch_gen_v):
feed = {'tf_x:0': batch_x,
'tf_y:0': batch_y,
'is_train:0': False} ## for dropout
valid_acc = sess.run('accuracy:0', feed_dict=feed)
valid_accuracy_sum += valid_acc
valid_accuracy = valid_accuracy_sum / batch_num # verbose but more intuitive
print('Validation Acc: %7.3f' % valid_accuracy)
else:
print()
pred_probs =[]
pred_labels =[]
#batch_gen = batch_generator(X_test_centered, y_test, batch_size=batchsize, shuffle=False)
batch_gen = batch_generator(X_test_centered, y_test, batch_size=batchsize, shuffle=False)
for i, (batch_x,batch_y) in enumerate(batch_gen): #do not need y
feed = {'tf_x:0': batch_x, 'is_train:0': False}
probs = sess.run('probabilities:0', feed_dict=feed)
labels = sess.run('labels:0', feed_dict=feed)
pred_probs.extend(probs) #only extend worked as I wanted
pred_labels.extend(labels)
#need to understand TF sessions better - cannot restore session properly to score the test data set
layer_end = time.time()
layer_time = round(layer_end - layer_start)
print ( "\n Layers complete time after",epochs,"epochs:", layer_time, " secs or", round(layer_time / 60.) , "mins \n")
#10 epochs at max batchsize=3500, 17 secs - 3 secs per epoch!
#so, increasing batch size reduces training time for each each epoch
print('Test data set Accuracy: %.2f%%' % ( 100* np.sum(y_test == pred_labels)/len(y_test)))
# In[32]: Predictions from the test data set
print ("\nActual labels:")
print (y_test[:10]) #first 10 predicted classes
print ("\nPredicted labels:")
print (pred_labels[:10]) #first 10 predicted classes
print ("\nPrediction probabilities:")
print (np.max(pred_probs[:10],axis=0)) #probs for the first obs
#OK, makes sense
### miscelanneous notes below
# In[32]: Understanding tensor shapes
#with tf.Session(graph=g) as sess:
# print('input_x_2dimages:',tf_x_image.get_shape())
#shows the original, not the actual
# In[32]: Save
#with tf.Session(graph=g) as sess:
# sess.run(init_op) #need to initialize the variables in the session first before can save them
# saver.save(sess, save_path='./model_one.ckpt', global_step=epochs) #number the checkpoint by global_step - just a name
#model_one.ckpt-1.index saved to current dir, suffix is the epoch
# In[32]: Load session
#with tf.Session(graph=g) as sess:
# saver.restore(sess, save_path='./model_one.ckpt-%d' % epoch) # find that checkpoint
# In[32]: The End
The End