sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.29   R6_2.5.1        jsonlite_1.7.2  magrittr_2.0.1 
##  [5] evaluate_0.14   stringi_1.7.6   rlang_1.0.1     cli_3.1.0      
##  [9] rstudioapi_0.13 jquerylib_0.1.4 bslib_0.3.1     rmarkdown_2.11 
## [13] tools_3.6.0     stringr_1.4.0   xfun_0.29       yaml_2.2.1     
## [17] fastmap_1.1.0   compiler_3.6.0  htmltools_0.5.2 knitr_1.37     
## [21] sass_0.4.0

Source: https://tensorflow.rstudio.com/keras/articles/examples/mnist_cnn.html

In this example, we train a convolutional neural networks (CNN) on the MNIST data set. Achieve testing accuracy 99.16% after 12 epochs.

Prepare data

For CNN, instead of vectorizing the images, we keep the 2D structure.

library(keras)

# Data Preparation -----------------------------------------------------

batch_size <- 128
num_classes <- 10
epochs <- 12

# Input image dimensions
img_rows <- 28
img_cols <- 28

# The data, shuffled and split between train and test sets
mnist <- dataset_mnist()
## Loaded Tensorflow version 2.6.2
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y

# Redefine dimension of train/test inputs
x_train <- array_reshape(x_train, c(nrow(x_train), img_rows, img_cols, 1))
x_test <- array_reshape(x_test, c(nrow(x_test), img_rows, img_cols, 1))
input_shape <- c(img_rows, img_cols, 1)

# Transform RGB values into [0,1] range
x_train <- x_train / 255
x_test <- x_test / 255

cat('x_train_shape:', dim(x_train), '\n')
## x_train_shape: 60000 28 28 1
cat(nrow(x_train), 'train samples\n')
## 60000 train samples
cat(nrow(x_test), 'test samples\n')
## 10000 test samples
# Convert class vectors to binary class matrices
y_train <- to_categorical(y_train, num_classes)
y_test <- to_categorical(y_test, num_classes)
image(t(x_train[1, 28:1, ,]), useRaster=TRUE, axes=FALSE, col=grey(seq(0, 1, length = 256)))

y_train[1, ]
##  [1] 0 0 0 0 0 1 0 0 0 0

Define model

Define model:

model <- keras_model_sequential()
model %>%
  layer_conv_2d(filters = 32, kernel_size = c(3,3), activation = 'relu',
                input_shape = input_shape) %>% 
  layer_conv_2d(filters = 64, kernel_size = c(3,3), activation = 'relu') %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_dropout(rate = 0.25) %>% 
  layer_flatten() %>% 
  layer_dense(units = 128, activation = 'relu') %>% 
  layer_dropout(rate = 0.5) %>% 
  layer_dense(units = num_classes, activation = 'softmax')

Compile model:

# Compile model
model %>% compile(
  loss = loss_categorical_crossentropy,
  optimizer = optimizer_adadelta(),
  metrics = c('accuracy')
)
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## conv2d_1 (Conv2D)                   (None, 26, 26, 32)              320         
## ________________________________________________________________________________
## conv2d (Conv2D)                     (None, 24, 24, 64)              18496       
## ________________________________________________________________________________
## max_pooling2d (MaxPooling2D)        (None, 12, 12, 64)              0           
## ________________________________________________________________________________
## dropout_1 (Dropout)                 (None, 12, 12, 64)              0           
## ________________________________________________________________________________
## flatten (Flatten)                   (None, 9216)                    0           
## ________________________________________________________________________________
## dense_1 (Dense)                     (None, 128)                     1179776     
## ________________________________________________________________________________
## dropout (Dropout)                   (None, 128)                     0           
## ________________________________________________________________________________
## dense (Dense)                       (None, 10)                      1290        
## ================================================================================
## Total params: 1,199,882
## Trainable params: 1,199,882
## Non-trainable params: 0
## ________________________________________________________________________________

Train and evaluate (on local CPU)

system.time({
history <- model %>% fit(
  x_train, y_train,
  batch_size = batch_size,
  epochs = epochs,
  verbose = 1,
  validation_data = list(x_test, y_test)
)
})
##     user   system  elapsed 
## 3027.755   33.487  885.293
plot(history)
## `geom_smooth()` using formula 'y ~ x'

Testing:

scores <- model %>% 
  evaluate(x_test, y_test, verbose = 0)

# Output metrics
cat('Test loss:', scores[[1]], '\n')
## Test loss: 0.02808629
cat('Test accuracy:', scores[[2]], '\n')
## Test accuracy: 0.992

Train and evaluate (using GPU)

On a machine with a single GPU NVIDIA GeForce RTX 2080 Ti (11GB GDDR6, 4352 cores), the time for training process is reduced to 51 seconds.