Display machine information:

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.29   R6_2.5.1        jsonlite_1.7.2  magrittr_2.0.1 
##  [5] evaluate_0.14   stringi_1.7.6   rlang_1.0.1     cli_3.1.0      
##  [9] rstudioapi_0.13 jquerylib_0.1.4 bslib_0.3.1     rmarkdown_2.11 
## [13] tools_3.6.0     stringr_1.4.0   xfun_0.29       yaml_2.2.1     
## [17] fastmap_1.1.0   compiler_3.6.0  htmltools_0.5.2 knitr_1.37     
## [21] sass_0.4.0

Load database libraries and the tidyverse frontend:

suppressPackageStartupMessages(library(dbplyr))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(lubridate))
library(DBI)
library(bigrquery)

Q1. Compile the ICU cohort in HW2 from the Google BigQuery database

Below is an outline of steps.

  1. Load the service account token.
# path to the service account token 
if (Sys.info()[["nodename"]] == "biostat-203b-teaching-server") {
  # on teaching server
  satoken <- "/mnt/mimiciv/1.0/biostat-203b-2022winter-3fdc2392ac39.json"
} else {
  # on my own Mac laptop
  satoken <- "/Users/huazhou/Documents/github.com/ucla-biostat-203b/2022winter/hw/hw3/biostat-203b-2022winter-3fdc2392ac39.json"
}
# BigQuery authentication using service account
bq_auth(
  path = satoken,
  # email = "mimiciv-bigquery@biostat-203b-2022winter.iam.gserviceaccount.com",
  # scopes = c("https://www.googleapis.com/auth/bigquery",
  #   "https://www.googleapis.com/auth/cloud-platform")
)
  1. Connect to BigQuery database mimic4_v1_0_203b in GCP (Google Cloud Platform), using the billing account biostat-203b-2022winter.
# Connect to the BigQuery database `biostat-203b-2022winter.mimic4_v1_0_203b`
con <- dbConnect(
    bigrquery::bigquery(),
    project = "biostat-203b-2022winter",
    dataset = "mimic4_v1_0_203b",
    billing = "biostat-203b-2022winter"
)
con
## <BigQueryConnection>
##   Dataset: biostat-203b-2022winter.mimic4_v1_0_203b
##   Billing: biostat-203b-2022winter
  1. List all tables in the mimic4_v1_0_203b database.
dbListTables(con)
## [1] "admissions"  "chartevents" "d_items"     "d_labitems"  "icustays"   
## [6] "labevents"   "patients"
  1. Connect to the icustays table.
# full ICU stays table
icustays_tble <- tbl(con, "icustays") %>%
  show_query() %>%
  print(width = Inf)
## <SQL>
## SELECT *
## FROM `icustays`
## # Source:   table<icustays> [?? x 8]
## # Database: BigQueryConnection
##    subject_id  hadm_id  stay_id first_careunit last_careunit 
##         <int>    <int>    <int> <chr>          <chr>         
##  1   12776735 20817525 34547665 Neuro Stepdown Neuro Stepdown
##  2   16256226 20013290 39289362 Neuro Stepdown Neuro Stepdown
##  3   12974563 29618057 32563675 Neuro Stepdown Neuro Stepdown
##  4   14609218 20606189 34947848 Neuro Stepdown Neuro Stepdown
##  5   12687112 26132667 37445058 Neuro Stepdown Neuro Stepdown
##  6   18190935 24712081 30056748 Neuro Stepdown Neuro Stepdown
##  7   10404210 22880512 30254828 Neuro Stepdown Neuro Stepdown
##  8   12552973 21320643 30261549 Neuro Stepdown Neuro Stepdown
##  9   14303051 26559537 30401486 Neuro Stepdown Neuro Stepdown
## 10   12347959 22779073 30503984 Neuro Stepdown Neuro Stepdown
##    intime              outtime               los
##    <dttm>              <dttm>              <dbl>
##  1 2200-07-12 00:33:00 2200-07-13 16:44:40 1.67 
##  2 2150-12-20 16:09:08 2150-12-21 14:58:40 0.951
##  3 2138-11-13 23:30:01 2138-11-15 16:25:19 1.71 
##  4 2174-06-28 21:13:00 2174-07-05 17:01:32 6.83 
##  5 2162-05-31 18:08:45 2162-06-04 10:16:13 3.67 
##  6 2115-12-08 09:39:45 2115-12-12 02:36:52 3.71 
##  7 2161-03-03 18:30:00 2161-03-05 18:10:31 1.99 
##  8 2135-09-21 18:12:03 2135-09-23 12:56:46 1.78 
##  9 2175-11-20 01:02:00 2175-11-21 16:06:17 1.63 
## 10 2140-07-11 17:54:14 2140-07-19 13:01:05 7.80 
## # … with more rows

We only keep the first ICU stay. Following code is kind of a hack, using the summarise_all(min) function. It seems that slice_min(), slice_head(), distinct(, .keep_all = TRUE) don’t work with dbplyr+bigrquery at the moment.

icustays_tble <- icustays_tble %>% 
  select(subject_id, intime) %>% 
  group_by(subject_id) %>% 
  summarise_all(min) %>%
  left_join(icustays_tble, by = c("subject_id", "intime")) %>%
  show_query() %>%
  print(width = Inf)
## Warning: Missing values are always removed in SQL.
## Use `MIN(x, na.rm = TRUE)` to silence this warning
## This warning is displayed only once per session.
## <SQL>
## SELECT `LHS`.`subject_id` AS `subject_id`, `LHS`.`intime` AS `intime`, `hadm_id`, `stay_id`, `first_careunit`, `last_careunit`, `outtime`, `los`
## FROM (SELECT `subject_id`, MIN(`intime`) AS `intime`
## FROM (SELECT `subject_id`, `intime`
## FROM `icustays`) `q01`
## GROUP BY `subject_id`) `LHS`
## LEFT JOIN `icustays` AS `RHS`
## ON (`LHS`.`subject_id` = `RHS`.`subject_id` AND `LHS`.`intime` = `RHS`.`intime`)
## 
## # Source:   lazy query [?? x 8]
## # Database: BigQueryConnection
##    subject_id intime               hadm_id  stay_id first_careunit
##         <int> <dttm>                 <int>    <int> <chr>         
##  1   12776735 2200-07-12 00:33:00 20817525 34547665 Neuro Stepdown
##  2   16256226 2150-12-20 16:09:08 20013290 39289362 Neuro Stepdown
##  3   12974563 2138-11-13 23:30:01 29618057 32563675 Neuro Stepdown
##  4   14609218 2174-06-28 21:13:00 20606189 34947848 Neuro Stepdown
##  5   12687112 2162-05-31 18:08:45 26132667 37445058 Neuro Stepdown
##  6   18190935 2115-11-12 23:23:32 21137829 34338402 Neuro Stepdown
##  7   10404210 2161-03-03 18:30:00 22880512 30254828 Neuro Stepdown
##  8   12552973 2135-09-21 18:12:03 21320643 30261549 Neuro Stepdown
##  9   14303051 2175-11-20 01:02:00 26559537 30401486 Neuro Stepdown
## 10   12347959 2140-07-11 17:54:14 22779073 30503984 Neuro Stepdown
##    last_careunit  outtime               los
##    <chr>          <dttm>              <dbl>
##  1 Neuro Stepdown 2200-07-13 16:44:40 1.67 
##  2 Neuro Stepdown 2150-12-21 14:58:40 0.951
##  3 Neuro Stepdown 2138-11-15 16:25:19 1.71 
##  4 Neuro Stepdown 2174-07-05 17:01:32 6.83 
##  5 Neuro Stepdown 2162-06-04 10:16:13 3.67 
##  6 Neuro Stepdown 2115-11-15 18:06:26 2.78 
##  7 Neuro Stepdown 2161-03-05 18:10:31 1.99 
##  8 Neuro Stepdown 2135-09-23 12:56:46 1.78 
##  9 Neuro Stepdown 2175-11-21 16:06:17 1.63 
## 10 Neuro Stepdown 2140-07-19 13:01:05 7.80 
## # … with more rows
  1. Connect to the admissions table and only keep the patients who have a match in icustays_tble (according to subject_id and hadm_id).
# # TODO
# admissions_tble <- 
  1. Connect to the patients table and only keep the patients who have a match in icustays_tble (according to subject_id).
# # TODO
# patients_tble <-
  1. Connect to the labevents table and retrieve a subset that only contain subjects who appear in icustays_tble and the lab items listed in HW2.
# # TODO
# labevents_tble <- 

Only keep the first lab measurements during ICU stay and pivot lab items to become variables/columns.

# # TODO
# labevents_tble <- labevents_tble %>% ...
  1. Connect to chartevents table and retrieve a subset that only contain subjects who appear in icustays_tble and the chart events listed in HW2.
# # TODO
# chartevents_tble <- 

Only keep the first chart events during ICU stay and pivot chart events to become variables/columns.

# # TODO
# chartevents_tble <- chartevents_tble %>% ...
  1. Put things together. This step is similar to Q7 of HW2. Using one chain of pipes %>% to perform following data wrangling steps: (i) start with the icustays_tble for the first ICU stay of each unique patient, (ii) merge in admissions and patients tables, (iii) keep adults only (age at admission >= 18), (iv) merge in the labevents and chartevents tables, (v) create an indicator for 30-day mortality, (vi) save the final tibble to an icu_cohort.rds R data file in the mimiciv_shiny folder.
# make a directory mimiciv_shiny
if (!dir.exists("mimiciv_shiny")) {
  dir.create("mimiciv_shiny")
}
# # TODO
# icu_cohort <- icustays_tble %>% ...

Close database connection and clear workspace.

dbDisconnect(con)
rm(list = ls())

Q2. Shiny app

Develop a Shiny app for exploring the ICU cohort data created in Q1. The app should reside in the mimiciv_shiny folder. The app should provide easy access to the graphical and numerical summaries of variables (demographics, lab measurements, vitals) in the ICU cohort.