library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/kevinwang/Dropbox (Sydney Uni)/tidytuesday
library(readxl)
library(patchwork)
library(ggiraph)
theme_set(theme_classic(18) +
theme(legend.position =
"bottom"))
raw_data = readxl::read_excel(here("2018-04-02", "us_avg_tuition.xlsx"), sheet = 1)
long_data = raw_data %>%
tidyr::gather(key = year,
value = cost,
-State)
long_data
## # A tibble: 600 x 3
## State year cost
## <chr> <chr> <dbl>
## 1 Alabama 2004-05 5683.
## 2 Alaska 2004-05 4328.
## 3 Arizona 2004-05 5138.
## 4 Arkansas 2004-05 5772.
## 5 California 2004-05 5286.
## 6 Colorado 2004-05 4704.
## 7 Connecticut 2004-05 7984.
## 8 Delaware 2004-05 8353.
## 9 Florida 2004-05 3848.
## 10 Georgia 2004-05 4298.
## # … with 590 more rows
long_data %>%
dplyr::filter(year == "2004-05") %>%
dplyr::arrange(cost)
## # A tibble: 50 x 3
## State year cost
## <chr> <chr> <dbl>
## 1 Nevada 2004-05 3621.
## 2 Florida 2004-05 3848.
## 3 Wyoming 2004-05 4086.
## 4 Utah 2004-05 4125.
## 5 Hawaii 2004-05 4267.
## 6 Georgia 2004-05 4298.
## 7 Alaska 2004-05 4328.
## 8 Louisiana 2004-05 4453.
## 9 Oklahoma 2004-05 4454.
## 10 North Carolina 2004-05 4493.
## # … with 40 more rows
long_data %>%
ggplot(aes(x = year, y = cost,
group = State,
colour = State)) +
geom_path() +
theme(legend.position = "none")
region_data = read_csv(here("2018-04-02", "US_state_regions.csv"),
col_names = c("Region", "State")) %>%
tidyr::fill(Region, .direction = "down") %>%
dplyr::mutate(State = str_trim(State, side = "right"))
## Parsed with column specification:
## cols(
## Region = col_character(),
## State = col_character()
## )
region_data
## # A tibble: 51 x 2
## Region State
## <chr> <chr>
## 1 New England Connecticut
## 2 New England Maine
## 3 New England Massachusetts
## 4 New England New Hampshire
## 5 New England Rhode Island
## 6 New England Vermont
## 7 Mideast Delaware
## 8 Mideast District of Columbia
## 9 Mideast Maryland
## 10 Mideast New Jersey
## # … with 41 more rows
us_states_gdp = read_csv(here("2018-04-02", "us_states_GDP_PerCap.csv")) %>%
dplyr::select(-Rank) %>%
tidyr::gather(key = year,
value = gdp,
-State)
## Parsed with column specification:
## cols(
## Rank = col_character(),
## State = col_character(),
## `2018` = col_number(),
## `2017` = col_number(),
## `2016` = col_number(),
## `2015` = col_number(),
## `2014` = col_number(),
## `2013` = col_number(),
## `2012` = col_number(),
## `2011` = col_number()
## )
us_states_gdp
## # A tibble: 416 x 3
## State year gdp
## <chr> <chr> <dbl>
## 1 District of Columbia 2018 160472
## 2 Massachusetts 2018 65545
## 3 New York 2018 64579
## 4 Connecticut 2018 64511
## 5 Alaska 2018 63971
## 6 Delaware 2018 63664
## 7 North Dakota 2018 62837
## 8 Wyoming 2018 58821
## 9 New Jersey 2018 57084
## 10 Washington 2018 56831
## # … with 406 more rows
gplots::venn(
list(
tuition_data = long_data$State %>% unique,
gdp_data = us_states_gdp$State %>% unique,
region_data = region_data$State %>% unique
)
)
long_data_merge = long_data %>%
dplyr::mutate(
year_range = year,
year = stringr::str_sub(year, 1L, 4L))
merge_data = long_data_merge %>%
dplyr::left_join(us_states_gdp, by = c("State", "year")) %>%
dplyr::left_join(region_data, by = "State")
complete_merge_data = merge_data %>%
dplyr::filter(complete.cases(gdp)) %>%
dplyr::group_by(year) %>%
dplyr::mutate(
rank_gdp = rank(-gdp),
rank_cost = rank(-cost))
plot_merge_2011 = complete_merge_data %>%
dplyr::filter(year == 2011) %>%
ggplot(aes(x = gdp, y = cost, colour = Region)) +
ggiraph::geom_point_interactive(aes(tooltip = State, onclick = rank_gdp, data_id = State)) +
ggsci::scale_color_lancet()
plot_merge_2015 = complete_merge_data %>%
dplyr::filter(year == 2015) %>%
ggplot(aes(x = gdp, y = cost, colour = Region)) +
ggiraph::geom_point_interactive(aes(tooltip = State, onclick = rank_gdp, data_id = State)) +
ggsci::scale_color_lancet()
cowplot::plot_grid(plot_merge_2011 +
ggpubr::stat_conf_ellipse(aes(color = Region), level = 0.8),
plot_merge_2015 +
ggpubr::stat_conf_ellipse(aes(color = Region), level = 0.8))
https://cran.r-project.org/web/packages/ggsci/vignettes/ggsci.html
girafe(code = print(plot_merge_2011 + plot_merge_2015), width_svg = 8, height_svg = 4)
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gdtools_0.1.8 ggiraph_0.6.1 patchwork_0.0.1 readxl_1.3.1
## [5] here_0.1 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.1
## [9] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3 tibble_2.1.1
## [13] ggplot2_3.1.1 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 lubridate_1.7.4 lattice_0.20-38
## [4] gtools_3.8.1 assertthat_0.2.1 zeallot_0.1.0
## [7] rprojroot_1.3-2 digest_0.6.19 utf8_1.1.4
## [10] R6_2.4.0 cellranger_1.1.0 plyr_1.8.4
## [13] backports_1.1.4 evaluate_0.14 httr_1.4.0
## [16] pillar_1.4.0 gplots_3.0.1.1 rlang_0.3.4
## [19] lazyeval_0.2.2 rstudioapi_0.10 gdata_2.18.0
## [22] rmarkdown_1.13 labeling_0.3 htmlwidgets_1.3
## [25] munsell_0.5.0 broom_0.5.2 compiler_3.6.0
## [28] modelr_0.1.4 xfun_0.7 pkgconfig_2.0.2
## [31] htmltools_0.3.6 tidyselect_0.2.5 fansi_0.4.0
## [34] crayon_1.3.4 withr_2.1.2 ggpubr_0.2
## [37] bitops_1.0-6 grid_3.6.0 nlme_3.1-140
## [40] jsonlite_1.6 gtable_0.3.0 magrittr_1.5
## [43] scales_1.0.0 KernSmooth_2.23-15 cli_1.1.0
## [46] stringi_1.4.3 xml2_1.2.0 generics_0.0.2
## [49] vctrs_0.1.0 cowplot_0.9.4 ggsci_2.9
## [52] tools_3.6.0 glue_1.3.1 hms_0.4.2
## [55] yaml_2.2.0 colorspace_1.4-1 caTools_1.17.1.2
## [58] rvest_0.3.4 knitr_1.23 haven_2.1.0