sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.29 R6_2.5.1 jsonlite_1.7.2 magrittr_2.0.1
## [5] evaluate_0.14 stringi_1.7.6 rlang_1.0.1 cli_3.1.0
## [9] rstudioapi_0.13 jquerylib_0.1.4 bslib_0.3.1 rmarkdown_2.11
## [13] tools_3.6.0 stringr_1.4.0 xfun_0.29 yaml_2.2.1
## [17] fastmap_1.1.0 compiler_3.6.0 htmltools_0.5.2 knitr_1.37
## [21] sass_0.4.0
Source: https://tensorflow.rstudio.com/keras/articles/examples/mnist_cnn.html
In this example, we train a convolutional neural networks (CNN) on the MNIST data set. Achieve testing accuracy 99.16% after 12 epochs.
The MNIST database (Modified National Institute of Standards and Technology database) is a large database of handwritten digits (\(28 \times 28\)) that is commonly used for training and testing machine learning algorithms.
60,000 training images, 10,000 testing images.
For CNN, instead of vectorizing the images, we keep the 2D structure.
library(keras)
# Data Preparation -----------------------------------------------------
batch_size <- 128
num_classes <- 10
epochs <- 12
# Input image dimensions
img_rows <- 28
img_cols <- 28
# The data, shuffled and split between train and test sets
mnist <- dataset_mnist()
## Loaded Tensorflow version 2.6.2
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y
# Redefine dimension of train/test inputs
x_train <- array_reshape(x_train, c(nrow(x_train), img_rows, img_cols, 1))
x_test <- array_reshape(x_test, c(nrow(x_test), img_rows, img_cols, 1))
input_shape <- c(img_rows, img_cols, 1)
# Transform RGB values into [0,1] range
x_train <- x_train / 255
x_test <- x_test / 255
cat('x_train_shape:', dim(x_train), '\n')
## x_train_shape: 60000 28 28 1
cat(nrow(x_train), 'train samples\n')
## 60000 train samples
cat(nrow(x_test), 'test samples\n')
## 10000 test samples
# Convert class vectors to binary class matrices
y_train <- to_categorical(y_train, num_classes)
y_test <- to_categorical(y_test, num_classes)
image(t(x_train[1, 28:1, ,]), useRaster=TRUE, axes=FALSE, col=grey(seq(0, 1, length = 256)))
y_train[1, ]
## [1] 0 0 0 0 0 1 0 0 0 0
Define model:
model <- keras_model_sequential()
model %>%
layer_conv_2d(filters = 32, kernel_size = c(3,3), activation = 'relu',
input_shape = input_shape) %>%
layer_conv_2d(filters = 64, kernel_size = c(3,3), activation = 'relu') %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_dropout(rate = 0.25) %>%
layer_flatten() %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = num_classes, activation = 'softmax')
Compile model:
# Compile model
model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adadelta(),
metrics = c('accuracy')
)
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## conv2d_1 (Conv2D) (None, 26, 26, 32) 320
## ________________________________________________________________________________
## conv2d (Conv2D) (None, 24, 24, 64) 18496
## ________________________________________________________________________________
## max_pooling2d (MaxPooling2D) (None, 12, 12, 64) 0
## ________________________________________________________________________________
## dropout_1 (Dropout) (None, 12, 12, 64) 0
## ________________________________________________________________________________
## flatten (Flatten) (None, 9216) 0
## ________________________________________________________________________________
## dense_1 (Dense) (None, 128) 1179776
## ________________________________________________________________________________
## dropout (Dropout) (None, 128) 0
## ________________________________________________________________________________
## dense (Dense) (None, 10) 1290
## ================================================================================
## Total params: 1,199,882
## Trainable params: 1,199,882
## Non-trainable params: 0
## ________________________________________________________________________________
system.time({
history <- model %>% fit(
x_train, y_train,
batch_size = batch_size,
epochs = epochs,
verbose = 1,
validation_data = list(x_test, y_test)
)
})
## user system elapsed
## 3027.755 33.487 885.293
plot(history)
## `geom_smooth()` using formula 'y ~ x'
Testing:
scores <- model %>%
evaluate(x_test, y_test, verbose = 0)
# Output metrics
cat('Test loss:', scores[[1]], '\n')
## Test loss: 0.02808629
cat('Test accuracy:', scores[[2]], '\n')
## Test accuracy: 0.992
On a machine with a single GPU NVIDIA GeForce RTX 2080 Ti (11GB GDDR6, 4352 cores), the time for training process is reduced to 51 seconds.