1 Loading packages

library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.1     ✔ dplyr   0.8.1
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(here)
## here() starts at /Users/kevinwang/Dropbox (Sydney Uni)/tidytuesday
library(readxl)
library(patchwork)
library(ggiraph)

2 Loading functions

theme_set(theme_classic(18) +
            theme(legend.position = 
                    "bottom"))

3 Loading data

raw_data = readxl::read_excel(here("2018-04-02", "us_avg_tuition.xlsx"), sheet = 1)

4 Data cleaning

long_data = raw_data %>% 
  tidyr::gather(key = year, 
                value = cost, 
                -State)

long_data
## # A tibble: 600 x 3
##    State       year     cost
##    <chr>       <chr>   <dbl>
##  1 Alabama     2004-05 5683.
##  2 Alaska      2004-05 4328.
##  3 Arizona     2004-05 5138.
##  4 Arkansas    2004-05 5772.
##  5 California  2004-05 5286.
##  6 Colorado    2004-05 4704.
##  7 Connecticut 2004-05 7984.
##  8 Delaware    2004-05 8353.
##  9 Florida     2004-05 3848.
## 10 Georgia     2004-05 4298.
## # … with 590 more rows

5 Data summaries

long_data %>% 
  dplyr::filter(year == "2004-05") %>% 
  dplyr::arrange(cost)
## # A tibble: 50 x 3
##    State          year     cost
##    <chr>          <chr>   <dbl>
##  1 Nevada         2004-05 3621.
##  2 Florida        2004-05 3848.
##  3 Wyoming        2004-05 4086.
##  4 Utah           2004-05 4125.
##  5 Hawaii         2004-05 4267.
##  6 Georgia        2004-05 4298.
##  7 Alaska         2004-05 4328.
##  8 Louisiana      2004-05 4453.
##  9 Oklahoma       2004-05 4454.
## 10 North Carolina 2004-05 4493.
## # … with 40 more rows

6 Data visulisation

6.1 Time line

long_data %>% 
  ggplot(aes(x = year, y = cost, 
             group = State,
             colour = State)) +
  geom_path() +
  theme(legend.position = "none")

7 Add in US regions data

region_data = read_csv(here("2018-04-02", "US_state_regions.csv"), 
                       col_names = c("Region", "State")) %>% 
  tidyr::fill(Region, .direction = "down") %>% 
  dplyr::mutate(State = str_trim(State, side = "right"))
## Parsed with column specification:
## cols(
##   Region = col_character(),
##   State = col_character()
## )
region_data
## # A tibble: 51 x 2
##    Region       State               
##    <chr>        <chr>               
##  1 New England  Connecticut         
##  2 New England  Maine               
##  3 New England  Massachusetts       
##  4 New England  New Hampshire       
##  5 New England  Rhode Island        
##  6 New England  Vermont             
##  7 Mideast      Delaware            
##  8 Mideast      District of Columbia
##  9 Mideast      Maryland            
## 10 Mideast      New Jersey          
## # … with 41 more rows

8 Add in the GDP data from Wikipedia

us_states_gdp = read_csv(here("2018-04-02", "us_states_GDP_PerCap.csv")) %>% 
  dplyr::select(-Rank) %>% 
  tidyr::gather(key = year, 
                value = gdp,
                -State)
## Parsed with column specification:
## cols(
##   Rank = col_character(),
##   State = col_character(),
##   `2018` = col_number(),
##   `2017` = col_number(),
##   `2016` = col_number(),
##   `2015` = col_number(),
##   `2014` = col_number(),
##   `2013` = col_number(),
##   `2012` = col_number(),
##   `2011` = col_number()
## )
us_states_gdp
## # A tibble: 416 x 3
##    State                year     gdp
##    <chr>                <chr>  <dbl>
##  1 District of Columbia 2018  160472
##  2 Massachusetts        2018   65545
##  3 New York             2018   64579
##  4 Connecticut          2018   64511
##  5 Alaska               2018   63971
##  6 Delaware             2018   63664
##  7 North Dakota         2018   62837
##  8 Wyoming              2018   58821
##  9 New Jersey           2018   57084
## 10 Washington           2018   56831
## # … with 406 more rows

8.1 Checking state labels

gplots::venn(
  list(
    tuition_data = long_data$State %>% unique,
    gdp_data = us_states_gdp$State %>% unique, 
    region_data = region_data$State %>% unique
  )
)

8.2 Merging data

long_data_merge = long_data %>% 
  dplyr::mutate(
    year_range = year, 
    year = stringr::str_sub(year, 1L, 4L))

merge_data = long_data_merge %>% 
  dplyr::left_join(us_states_gdp, by = c("State", "year")) %>% 
  dplyr::left_join(region_data, by = "State")

8.3 Visualisation of GDP and cost

complete_merge_data = merge_data %>% 
  dplyr::filter(complete.cases(gdp)) %>% 
  dplyr::group_by(year) %>% 
  dplyr::mutate(
    rank_gdp = rank(-gdp), 
    rank_cost = rank(-cost))

plot_merge_2011 = complete_merge_data %>% 
  dplyr::filter(year == 2011) %>% 
  ggplot(aes(x = gdp, y = cost, colour = Region)) +
  ggiraph::geom_point_interactive(aes(tooltip = State, onclick = rank_gdp, data_id = State)) +
  ggsci::scale_color_lancet()

plot_merge_2015 = complete_merge_data %>% 
  dplyr::filter(year == 2015) %>% 
  ggplot(aes(x = gdp, y = cost, colour = Region)) +
  ggiraph::geom_point_interactive(aes(tooltip = State, onclick = rank_gdp, data_id = State)) +
  ggsci::scale_color_lancet()


cowplot::plot_grid(plot_merge_2011 +
                     ggpubr::stat_conf_ellipse(aes(color = Region), level = 0.8), 
                   plot_merge_2015 +
                     ggpubr::stat_conf_ellipse(aes(color = Region), level = 0.8))

https://cran.r-project.org/web/packages/ggsci/vignettes/ggsci.html

8.4 Interactive plot

girafe(code = print(plot_merge_2011 + plot_merge_2015), width_svg = 8, height_svg = 4)

9 Session Info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gdtools_0.1.8   ggiraph_0.6.1   patchwork_0.0.1 readxl_1.3.1   
##  [5] here_0.1        forcats_0.4.0   stringr_1.4.0   dplyr_0.8.1    
##  [9] purrr_0.3.2     readr_1.3.1     tidyr_0.8.3     tibble_2.1.1   
## [13] ggplot2_3.1.1   tidyverse_1.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         lubridate_1.7.4    lattice_0.20-38   
##  [4] gtools_3.8.1       assertthat_0.2.1   zeallot_0.1.0     
##  [7] rprojroot_1.3-2    digest_0.6.19      utf8_1.1.4        
## [10] R6_2.4.0           cellranger_1.1.0   plyr_1.8.4        
## [13] backports_1.1.4    evaluate_0.14      httr_1.4.0        
## [16] pillar_1.4.0       gplots_3.0.1.1     rlang_0.3.4       
## [19] lazyeval_0.2.2     rstudioapi_0.10    gdata_2.18.0      
## [22] rmarkdown_1.13     labeling_0.3       htmlwidgets_1.3   
## [25] munsell_0.5.0      broom_0.5.2        compiler_3.6.0    
## [28] modelr_0.1.4       xfun_0.7           pkgconfig_2.0.2   
## [31] htmltools_0.3.6    tidyselect_0.2.5   fansi_0.4.0       
## [34] crayon_1.3.4       withr_2.1.2        ggpubr_0.2        
## [37] bitops_1.0-6       grid_3.6.0         nlme_3.1-140      
## [40] jsonlite_1.6       gtable_0.3.0       magrittr_1.5      
## [43] scales_1.0.0       KernSmooth_2.23-15 cli_1.1.0         
## [46] stringi_1.4.3      xml2_1.2.0         generics_0.0.2    
## [49] vctrs_0.1.0        cowplot_0.9.4      ggsci_2.9         
## [52] tools_3.6.0        glue_1.3.1         hms_0.4.2         
## [55] yaml_2.2.0         colorspace_1.4-1   caTools_1.17.1.2  
## [58] rvest_0.3.4        knitr_1.23         haven_2.1.0