1 Loading packages

## here() starts at /Users/kevinwang/Dropbox (Sydney Uni)/tidytuesday

2 Loading functions

theme_set(theme_classic(18) +
            theme(legend.position = 

3 Loading data

raw_data = readxl::read_excel(here("2018-04-02", "us_avg_tuition.xlsx"), sheet = 1)

4 Data cleaning

long_data = raw_data %>% 
  tidyr::gather(key = year, 
                value = cost, 

5 Data summaries

long_data %>% 
  dplyr::filter(year == "2004-05") %>% 
6 Data visulisation

6.1 Time line

long_data %>% 
  ggplot(aes(x = year, y = cost, 
             group = State,
             colour = State)) +
  geom_path() +
  theme(legend.position = "none")

7 Add in US regions data

region_data = read_csv(here("2018-04-02", "US_state_regions.csv"), 
                       col_names = c("Region", "State")) %>% 
  tidyr::fill(Region, .direction = "down") %>% 
  dplyr::mutate(State = str_trim(State, side = "right"))
8 Add in the GDP data from Wikipedia

us_states_gdp = read_csv(here("2018-04-02", "us_states_GDP_PerCap.csv")) %>% 
  dplyr::select(-Rank) %>% 
  tidyr::gather(key = year, 
                value = gdp,
8.1 Checking state labels

    tuition_data = long_data$State %>% unique,
    gdp_data = us_states_gdp$State %>% unique, 
    region_data = region_data$State %>% unique

8.2 Merging data

long_data_merge = long_data %>% 
    year_range = year, 
    year = stringr::str_sub(year, 1L, 4L))

merge_data = long_data_merge %>% 
  dplyr::left_join(us_states_gdp, by = c("State", "year")) %>% 
  dplyr::left_join(region_data, by = "State")

8.3 Visualisation of GDP and cost

complete_merge_data = merge_data %>% 
  dplyr::filter(complete.cases(gdp)) %>% 
  dplyr::group_by(year) %>% 
    rank_gdp = rank(-gdp), 
    rank_cost = rank(-cost))

plot_merge_2011 = complete_merge_data %>% 
  dplyr::filter(year == 2011) %>% 
  ggplot(aes(x = gdp, y = cost, colour = Region)) +
  ggiraph::geom_point_interactive(aes(tooltip = State, onclick = rank_gdp, data_id = State)) +

plot_merge_2015 = complete_merge_data %>% 
  dplyr::filter(year == 2015) %>% 
  ggplot(aes(x = gdp, y = cost, colour = Region)) +
  ggiraph::geom_point_interactive(aes(tooltip = State, onclick = rank_gdp, data_id = State)) +

cowplot::plot_grid(plot_merge_2011 +
                     ggpubr::stat_conf_ellipse(aes(color = Region), level = 0.8), 
                   plot_merge_2015 +
                     ggpubr::stat_conf_ellipse(aes(color = Region), level = 0.8))


8.4 Interactive plot

girafe(code = print(plot_merge_2011 + plot_merge_2015), width_svg = 8, height_svg = 4)

9 Session Info

