Feeding Our Own Data Set Into the CNN Model in TensorFlow

Posted By : Harshit Verma | 31-Dec-2018

Dataset

There is a lot of datasets available on the internet. Kaggle Dog vs Cat dataset consists of the 25,000 color images of the dogs and the cats that we use for the training. Each image is a different size of the pixel intensities, represented as the [0, 255] integer values in the RGB color space.

of records

We need to convert data to native to record format. Google provides the single script for converting Image data to the TFRecord format.

Convolution neural network

We use three types of layers to build Convolution neural network architectures: Convolutional Layer, Pooling Layer, and Fully-Connected Layer. We will stack these layers to form the full ConvNet architecture.

Dense Layer
We want to add the dense layer (with 1,024 neurons and ReLU activation) to our CNN to perform classification on features extracted by convolution/pooling layers.

Logits Layer
You have the 1024 real numbers that you can feed to a softmax unit.

_DEFAULT_IMAGE_SIZE = 252
_NUM_CHANNELS = 3
_NUM_CLASSES = 2
 
"""Model function for CNN."""
def cnn_model_fn(features, labels, mode):
    
    # Input Layer
    input_layer = tf.reshape(features["image"], [-1, _DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, 3])
 
    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(
        inputs=input_layer,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
 
    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
 
    # Convolutional Layer #2 and Pooling Layer #2
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
 
    # Dense Layer
    pool2_flat = tf.reshape(pool2, [-1, 126 * 126 * 64])
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(
        inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
 
    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=2)
 
    ...

We can derive the probabilities from our logits layer by applying softmax activation using the tf.nn.softmax:

def cnn_model_fn(features, labels, mode):
    ....
    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    ...

Calculate Loss

That measures how closely model’s predictions match target classes

#Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=2)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)

Training Operation

if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

Add evaluation metrics

 eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

Load Training and Test Data

ef get_file_lists(data_dir):
    import glob

    train_list = glob.glob(data_dir + '/' + 'train-*')
    valid_list = glob.glob(data_dir + '/' + 'validation-*')
    if len(train_list) == 0 and \
                    len(valid_list) == 0:
        raise IOError('No files found at specified path!')
    return train_list, valid_list

def parse_record(raw_record, is_training):
    """Parse an ImageNet record from `value`."""
    keys_to_features = {
        'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/class/label':
            tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
        'image/class/text':
            tf.FixedLenFeature([], dtype=tf.string, default_value=''),
    }

    parsed = tf.parse_single_example(raw_record, keys_to_features)

    image = tf.image.decode_image(
        tf.reshape(parsed['image/encoded'], shape=[]),
        _NUM_CHANNELS)

    image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    image = vgg_preprocessing.preprocess_image(
        image=image,
        output_height=_DEFAULT_IMAGE_SIZE,
        output_width=_DEFAULT_IMAGE_SIZE,
        is_training=is_training)

    label = tf.cast(
        tf.reshape(parsed['image/class/label'], shape=[]),
        dtype=tf.int32)

    return {"image": image}, label


Train a model with a different image size.

simplest solution is to artificially resize your images to be 252×252 pixels. 

Create input functions

def input_fn(is_training, filenames, batch_size, num_epochs=1, num_parallel_calls=1):
    dataset = tf.data.TFRecordDataset(filenames)

    if is_training:
        dataset = dataset.shuffle(buffer_size=1500)

    dataset = dataset.map(lambda value: parse_record(value, is_training),
                          num_parallel_calls=num_parallel_calls)
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat(num_epochs)
    iterator = dataset.make_one_shot_iterator()

    features, labels = iterator.get_next()

    return features, labels


def train_input_fn(file_path):
    return input_fn(True, file_path, 100, None, 10)


def validation_input_fn(file_path):
    return input_fn(False, file_path, 50, 1, 1)

Create the Estimator

The estimator is the TensorFlow class for performing high-level model training, evaluation, and inference for our model.

classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir="/tmp/convnet_model")

Set Up a Logging Hook

  # Set up logging for predictions
  tensors_to_log = {"probabilities": "softmax_tensor"}
  logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

Train the Model

classifier.train(input_fn=lambda: train_input_fn(train_list), steps=10, hooks=[logging_hook])

Evaluate the Model

evalution = classifier.evaluate(input_fn=lambda: validation_input_fn(valid_list))

Run the Model

Training CNN is quite computationally intensive. Estimated completion time of the python script will vary depending on your processor. To train more quickly, you can decrease number of steps passed to train(), but note that this will affect the accuracy.