1 Summary

This data originates from the tidytuesday challenge. (https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-05-21)
There are three pieces of data, however, I will only focus on one of these: waste_vs_gdp.
I speculate the amount of waste is correlated to the GDP but also GDP growth (https://twitter.com/search?q=tidytuesday%20plastic&src=typd). Since GDP growth is not provided, I have decided to download the data from World Bank.

2 Loading packages

library(tidyverse)

## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.1.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.1     ✔ dplyr   0.8.1
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0

## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(gganimate)
library(skimr)

## 
## Attaching package: 'skimr'

## The following object is masked from 'package:stats':
## 
##     filter

library(ggsci)

theme_set(theme_classic(18) + 
            theme(legend.position = "bottom"))

3 Loading data

coast_vs_waste <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/coastal-population-vs-mismanaged-plastic.csv") %>% 
  janitor::clean_names()

## Parsed with column specification:
## cols(
##   Entity = col_character(),
##   Code = col_character(),
##   Year = col_double(),
##   `Mismanaged plastic waste (tonnes)` = col_double(),
##   `Coastal population` = col_double(),
##   `Total population (Gapminder)` = col_double()
## )

mismanaged_vs_gdp <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/per-capita-mismanaged-plastic-waste-vs-gdp-per-capita.csv") %>% 
  janitor::clean_names()

## Parsed with column specification:
## cols(
##   Entity = col_character(),
##   Code = col_character(),
##   Year = col_double(),
##   `Per capita mismanaged plastic waste (kilograms per person per day)` = col_double(),
##   `GDP per capita, PPP (constant 2011 international $) (Rate)` = col_double(),
##   `Total population (Gapminder)` = col_double()
## )

waste_vs_gdp <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/per-capita-plastic-waste-vs-gdp-per-capita.csv") %>% 
  janitor::clean_names()

## Parsed with column specification:
## cols(
##   Entity = col_character(),
##   Code = col_character(),
##   Year = col_double(),
##   `Per capita plastic waste (kilograms per person per day)` = col_double(),
##   `GDP per capita, PPP (constant 2011 international $) (constant 2011 international $)` = col_double(),
##   `Total population (Gapminder)` = col_double()
## )

4 Cleaning up `waste_vs_gdp`

waste_vs_gdp_missing = waste_vs_gdp %>% 
  group_by(entity) %>% 
  summarise(nMissing = per_capita_plastic_waste_kilograms_per_person_per_day %>% is.na %>% sum) %>% 
  dplyr::arrange(desc(nMissing))



waste_vs_gdp_cleaned = waste_vs_gdp %>% 
  dplyr::transmute(
    entity, code, year, 
    plastic_standard = per_capita_plastic_waste_kilograms_per_person_per_day,
    pop = total_population_gapminder) %>% 
  na.omit()

waste_vs_gdp_cleaned

## # A tibble: 175 x 5
##    entity              code   year plastic_standard      pop
##    <chr>               <chr> <dbl>            <dbl>    <dbl>
##  1 Albania             ALB    2010            0.069  3204284
##  2 Algeria             DZA    2010            0.144 35468208
##  3 Angola              AGO    2010            0.062 19081912
##  4 Anguilla            AIA    2010            0.252    15358
##  5 Antigua and Barbuda ATG    2010            0.66     88710
##  6 Argentina           ARG    2010            0.183 40412376
##  7 Aruba               ABW    2010            0.252   107488
##  8 Australia           AUS    2010            0.112 22268384
##  9 Bahamas             BHS    2010            0.39    342877
## 10 Bahrain             BHR    2010            0.132  1261835
## # … with 165 more rows

5 Loading World Bank data

gdp_growth_raw = readr::read_csv("API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663/API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663.csv", skip = 4)

## Warning: Missing column names filled in: 'X64' [64]

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Country Name` = col_character(),
##   `Country Code` = col_character(),
##   `Indicator Name` = col_character(),
##   `Indicator Code` = col_character(),
##   `1960` = col_logical(),
##   `2018` = col_logical(),
##   X64 = col_logical()
## )

## See spec(...) for full column specifications.

country_regions = readr::read_csv("API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663/Metadata_Country_API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663.csv", skip = 0) %>% 
  dplyr::select(entity = TableName, 
                region = Region)

## Warning: Missing column names filled in: 'X6' [6]

## Parsed with column specification:
## cols(
##   `Country Code` = col_character(),
##   Region = col_character(),
##   IncomeGroup = col_character(),
##   SpecialNotes = col_character(),
##   TableName = col_character(),
##   X6 = col_logical()
## )

gdp_growth_cleaned = gdp_growth_raw %>% 
  dplyr::rename(entity = `Country Name`
                # code = `Country Code`
) %>% 
  dplyr::select(entity, `1960`:`2018`) %>% 
  tidyr::gather(key = growth_year,
                value = gdp_growth, 
                `1960`:`2018`) %>% 
  left_join(country_regions, by = "entity")

gdp_growth_cleaned %>% skimr::skim()

## Skim summary statistics
##  n obs: 15576 
##  n variables: 4 
## 
## ── Variable type:character ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##     variable missing complete     n min max empty n_unique
##       entity       0    15576 15576   4  52     0      264
##  growth_year       0    15576 15576   4   4     0       59
##       region    3009    12567 15576  10  26     0        7
## 
## ── Variable type:numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##    variable missing complete     n mean   sd  p0    p25  p50 p75  p100
##  gdp_growth    4185    11391 15576  2.1 5.72 -65 -0.046 2.22 4.4 140.5
##      hist
##  ▁▁▇▁▁▁▁▁

gdp_growth_nest = gdp_growth_cleaned %>% 
  group_by(entity) %>% 
  tidyr::nest()


waste_join_growth = waste_vs_gdp_cleaned %>% 
  inner_join(gdp_growth_nest, by = "entity") %>% 
  unnest() %>% 
  dplyr::filter(growth_year >= 2001, growth_year != 2018)

waste_join_growth %>% skimr::skim()

## Skim summary statistics
##  n obs: 2431 
##  n variables: 8 
## 
## ── Variable type:character ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##     variable missing complete    n min max empty n_unique
##         code       0     2431 2431   3   3     0      143
##       entity       0     2431 2431   4  24     0      143
##  growth_year       0     2431 2431   4   4     0       17
##       region      34     2397 2431  10  26     0        7
## 
## ── Variable type:numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##          variable missing complete    n       mean      sd      p0
##        gdp_growth     139     2292 2431    2.19    5.43     -62.23
##  plastic_standard       0     2431 2431    0.2     0.31       0.01
##               pop       0     2431 2431    4.1e+07 1.6e+08 9827   
##              year       0     2431 2431 2010       0       2010   
##         p25        p50        p75       p100     hist
##       0.21        2.03    4.13     122.97    ▁▁▇▁▁▁▁▁
##       0.093       0.14    0.25       3.6     ▇▁▁▁▁▁▁▁
##  754493     5788163       2.4e+07    1.3e+09 ▇▁▁▁▁▁▁▁
##    2010        2010    2010       2010       ▁▁▁▇▁▁▁▁

waste_join_growth_filled = waste_join_growth %>% 
  group_by(entity) %>% 
  dplyr::arrange(growth_year) %>% 
  tidyr::fill(gdp_growth, .direction = "up") %>% 
  dplyr::filter(abs(gdp_growth) <= 20, plastic_standard < 2, complete.cases(region))
# dplyr::mutate(growth_year = growth_year %>% ISOdate(1, 1) %>% as.Date)

6 GDP growth vs waste in 2010 only

waste_join_growth_filled_2010 = waste_join_growth_filled %>% 
  dplyr::filter(growth_year == 2010, abs(gdp_growth) <= 20, plastic_standard < 2)



waste_join_growth_filled_2010 %>% 
  ggplot(aes(x = gdp_growth, y = plastic_standard)) +
  geom_point(aes(colour = region), size = 3) +
  geom_smooth(aes(group = 1), method = "lm", se = FALSE) +
  ggsci::scale_color_d3()

7 GDP growth vs waste over (extropolated years)

waste_join_growth_filled_lm = waste_join_growth_filled %>% 
  group_by(growth_year) %>% 
  tidyr::nest() %>% 
  dplyr::mutate(
    lm_tidy = purrr::map(
      .x = data, 
      .f = ~ lm(plastic_standard ~ gdp_growth, data = .x) %>% 
        broom::tidy()),
    slope_est = purrr::map_dbl(
      lm_tidy, .f = ~ .x$estimate[2]) %>% 
      round(4) %>% paste0("Slope = ", .), 
    slope_pval = purrr::map_dbl(
      lm_tidy, .f = ~ .x$p.value[2]) %>% 
      round(4) %>% paste0("Slope pvalue = ", .), 
    corr_est = purrr::map_dbl(
      .x = data, 
      .f = ~ cor(.x$plastic_standard, .x$gdp_growth)) %>% 
      round(4) %>% paste0("Corr = ", .), 
  ) %>%
  dplyr::select(-lm_tidy)
# tidyr::unnest()

waste_join_growth_filled_lm

## # A tibble: 17 x 5
##    growth_year data            slope_est     slope_pval        corr_est    
##    <chr>       <list>          <chr>         <chr>             <chr>       
##  1 2001        <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
##  2 2002        <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
##  3 2003        <tibble [131 ×… Slope = 4e-04 Slope pvalue = 0… Corr = 0.01…
##  4 2004        <tibble [131 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
##  5 2005        <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
##  6 2006        <tibble [132 ×… Slope = -7e-… Slope pvalue = 0… Corr = -0.02
##  7 2007        <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
##  8 2008        <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
##  9 2009        <tibble [134 ×… Slope = -0.0… Slope pvalue = 1… Corr = -0.3…
## 10 2010        <tibble [134 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 11 2011        <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.1…
## 12 2012        <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 13 2013        <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 14 2014        <tibble [130 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 15 2015        <tibble [130 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 16 2016        <tibble [131 ×… Slope = 5e-04 Slope pvalue = 0… Corr = 0.01…
## 17 2017        <tibble [128 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…

gif = waste_join_growth_filled %>%
  ggplot(aes(x = gdp_growth,
             y = plastic_standard)) +
  geom_point(aes(colour = region, group = 1L), size = 3) +
  geom_smooth(aes(group = growth_year), method = "lm", se = FALSE) +
  geom_text(data = waste_join_growth_filled_lm,
            mapping = aes(x = -10, y = 0.6, label = slope_est), 
            colour = "red", size = 5) +
  geom_text(data = waste_join_growth_filled_lm,
            mapping = aes(x = -10, y = 0.5, label = slope_pval), 
            colour = "red", size = 5) +
  geom_text(data = waste_join_growth_filled_lm,
            mapping = aes(x = -10, y = 0.4, label = corr_est), 
            colour = "red", size = 5) +
  ggsci::scale_colour_d3() +
  transition_states(growth_year,
                    transition_length = 0.1,
                    state_length = 0.5) +
  labs(title = 'Year: {closest_state}',
       x = 'GDP growth per capita',
       y = 'Plastic waste per capita (kg/day), in 2010') +
  # theme(title = slope_est) +
  # shadow_mark() + ## Make points stay
  ease_aes('linear')
# facet_wrap(~growth_year)

anim_save(filename = "gdp_growth_vs_plastic_per_cap.gif",
          animation = gif, width = 800)

8 Session Info

sessionInfo()

## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggsci_2.9       skimr_1.0.5     gganimate_1.0.3 janitor_1.2.0  
##  [5] forcats_0.4.0   stringr_1.4.0   dplyr_0.8.1     purrr_0.3.2    
##  [9] readr_1.3.1     tidyr_0.8.3     tibble_2.1.1    ggplot2_3.1.1  
## [13] tidyverse_1.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         lubridate_1.7.4    lattice_0.20-38   
##  [4] class_7.3-15       prettyunits_1.0.2  png_0.1-7         
##  [7] assertthat_0.2.1   zeallot_0.1.0      digest_0.6.19     
## [10] utf8_1.1.4         R6_2.4.0           cellranger_1.1.0  
## [13] plyr_1.8.4         backports_1.1.4    e1071_1.7-1       
## [16] evaluate_0.13      httr_1.4.0         pillar_1.4.0      
## [19] rlang_0.3.4        progress_1.2.2     lazyeval_0.2.2    
## [22] curl_3.3           readxl_1.3.1       rstudioapi_0.10   
## [25] gifski_0.8.6       rmarkdown_1.12     labeling_0.3      
## [28] munsell_0.5.0      broom_0.5.2        compiler_3.6.0    
## [31] modelr_0.1.4       xfun_0.7           pkgconfig_2.0.2   
## [34] htmltools_0.3.6    tidyselect_0.2.5   lpSolve_5.6.13    
## [37] fansi_0.4.0        sf_0.7-4           crayon_1.3.4      
## [40] withr_2.1.2        grid_3.6.0         DBI_1.0.0         
## [43] nlme_3.1-140       jsonlite_1.6       gtable_0.3.0      
## [46] magrittr_1.5       units_0.6-3        scales_1.0.0      
## [49] KernSmooth_2.23-15 cli_1.1.0          stringi_1.4.3     
## [52] farver_1.1.0       snakecase_0.10.0   xml2_1.2.0        
## [55] generics_0.0.2     vctrs_0.1.0        transformr_0.1.1  
## [58] tools_3.6.0        glue_1.3.1         tweenr_1.0.1      
## [61] hms_0.4.2          yaml_2.2.0         colorspace_1.4-1  
## [64] classInt_0.3-3     rvest_0.3.4        knitr_1.23        
## [67] haven_2.1.0

Tidytuesday

Kevin Wang

Initiated on 2019 May 21, compiled on 2019 May 21

1 Summary

2 Loading packages

3 Loading data

4 Cleaning up `waste_vs_gdp`

5 Loading World Bank data

6 GDP growth vs waste in 2010 only

7 GDP growth vs waste over (extropolated years)

8 Session Info

Tidytuesday

Kevin Wang

Initiated on 2019 May 21, compiled on 2019 May 21

1 Summary

2 Loading packages

3 Loading data

4 Cleaning up waste_vs_gdp

5 Loading World Bank data

6 GDP growth vs waste in 2010 only

7 GDP growth vs waste over (extropolated years)

8 Session Info

4 Cleaning up `waste_vs_gdp`