This data originates from the tidytuesday
challenge. (https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-05-21)
There are three pieces of data, however, I will only focus on one of these: waste_vs_gdp
.
I speculate the amount of waste is correlated to the GDP but also GDP growth (https://twitter.com/search?q=tidytuesday%20plastic&src=typd). Since GDP growth is not provided, I have decided to download the data from World Bank.
library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(gganimate)
library(skimr)
##
## Attaching package: 'skimr'
## The following object is masked from 'package:stats':
##
## filter
library(ggsci)
theme_set(theme_classic(18) +
theme(legend.position = "bottom"))
coast_vs_waste <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/coastal-population-vs-mismanaged-plastic.csv") %>%
janitor::clean_names()
## Parsed with column specification:
## cols(
## Entity = col_character(),
## Code = col_character(),
## Year = col_double(),
## `Mismanaged plastic waste (tonnes)` = col_double(),
## `Coastal population` = col_double(),
## `Total population (Gapminder)` = col_double()
## )
mismanaged_vs_gdp <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/per-capita-mismanaged-plastic-waste-vs-gdp-per-capita.csv") %>%
janitor::clean_names()
## Parsed with column specification:
## cols(
## Entity = col_character(),
## Code = col_character(),
## Year = col_double(),
## `Per capita mismanaged plastic waste (kilograms per person per day)` = col_double(),
## `GDP per capita, PPP (constant 2011 international $) (Rate)` = col_double(),
## `Total population (Gapminder)` = col_double()
## )
waste_vs_gdp <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/per-capita-plastic-waste-vs-gdp-per-capita.csv") %>%
janitor::clean_names()
## Parsed with column specification:
## cols(
## Entity = col_character(),
## Code = col_character(),
## Year = col_double(),
## `Per capita plastic waste (kilograms per person per day)` = col_double(),
## `GDP per capita, PPP (constant 2011 international $) (constant 2011 international $)` = col_double(),
## `Total population (Gapminder)` = col_double()
## )
waste_vs_gdp
waste_vs_gdp_missing = waste_vs_gdp %>%
group_by(entity) %>%
summarise(nMissing = per_capita_plastic_waste_kilograms_per_person_per_day %>% is.na %>% sum) %>%
dplyr::arrange(desc(nMissing))
waste_vs_gdp_cleaned = waste_vs_gdp %>%
dplyr::transmute(
entity, code, year,
plastic_standard = per_capita_plastic_waste_kilograms_per_person_per_day,
pop = total_population_gapminder) %>%
na.omit()
waste_vs_gdp_cleaned
## # A tibble: 175 x 5
## entity code year plastic_standard pop
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Albania ALB 2010 0.069 3204284
## 2 Algeria DZA 2010 0.144 35468208
## 3 Angola AGO 2010 0.062 19081912
## 4 Anguilla AIA 2010 0.252 15358
## 5 Antigua and Barbuda ATG 2010 0.66 88710
## 6 Argentina ARG 2010 0.183 40412376
## 7 Aruba ABW 2010 0.252 107488
## 8 Australia AUS 2010 0.112 22268384
## 9 Bahamas BHS 2010 0.39 342877
## 10 Bahrain BHR 2010 0.132 1261835
## # … with 165 more rows
gdp_growth_raw = readr::read_csv("API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663/API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663.csv", skip = 4)
## Warning: Missing column names filled in: 'X64' [64]
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Country Name` = col_character(),
## `Country Code` = col_character(),
## `Indicator Name` = col_character(),
## `Indicator Code` = col_character(),
## `1960` = col_logical(),
## `2018` = col_logical(),
## X64 = col_logical()
## )
## See spec(...) for full column specifications.
country_regions = readr::read_csv("API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663/Metadata_Country_API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_10576663.csv", skip = 0) %>%
dplyr::select(entity = TableName,
region = Region)
## Warning: Missing column names filled in: 'X6' [6]
## Parsed with column specification:
## cols(
## `Country Code` = col_character(),
## Region = col_character(),
## IncomeGroup = col_character(),
## SpecialNotes = col_character(),
## TableName = col_character(),
## X6 = col_logical()
## )
gdp_growth_cleaned = gdp_growth_raw %>%
dplyr::rename(entity = `Country Name`
# code = `Country Code`
) %>%
dplyr::select(entity, `1960`:`2018`) %>%
tidyr::gather(key = growth_year,
value = gdp_growth,
`1960`:`2018`) %>%
left_join(country_regions, by = "entity")
gdp_growth_cleaned %>% skimr::skim()
## Skim summary statistics
## n obs: 15576
## n variables: 4
##
## ── Variable type:character ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n min max empty n_unique
## entity 0 15576 15576 4 52 0 264
## growth_year 0 15576 15576 4 4 0 59
## region 3009 12567 15576 10 26 0 7
##
## ── Variable type:numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100
## gdp_growth 4185 11391 15576 2.1 5.72 -65 -0.046 2.22 4.4 140.5
## hist
## ▁▁▇▁▁▁▁▁
gdp_growth_nest = gdp_growth_cleaned %>%
group_by(entity) %>%
tidyr::nest()
waste_join_growth = waste_vs_gdp_cleaned %>%
inner_join(gdp_growth_nest, by = "entity") %>%
unnest() %>%
dplyr::filter(growth_year >= 2001, growth_year != 2018)
waste_join_growth %>% skimr::skim()
## Skim summary statistics
## n obs: 2431
## n variables: 8
##
## ── Variable type:character ────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n min max empty n_unique
## code 0 2431 2431 3 3 0 143
## entity 0 2431 2431 4 24 0 143
## growth_year 0 2431 2431 4 4 0 17
## region 34 2397 2431 10 26 0 7
##
## ── Variable type:numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0
## gdp_growth 139 2292 2431 2.19 5.43 -62.23
## plastic_standard 0 2431 2431 0.2 0.31 0.01
## pop 0 2431 2431 4.1e+07 1.6e+08 9827
## year 0 2431 2431 2010 0 2010
## p25 p50 p75 p100 hist
## 0.21 2.03 4.13 122.97 ▁▁▇▁▁▁▁▁
## 0.093 0.14 0.25 3.6 ▇▁▁▁▁▁▁▁
## 754493 5788163 2.4e+07 1.3e+09 ▇▁▁▁▁▁▁▁
## 2010 2010 2010 2010 ▁▁▁▇▁▁▁▁
waste_join_growth_filled = waste_join_growth %>%
group_by(entity) %>%
dplyr::arrange(growth_year) %>%
tidyr::fill(gdp_growth, .direction = "up") %>%
dplyr::filter(abs(gdp_growth) <= 20, plastic_standard < 2, complete.cases(region))
# dplyr::mutate(growth_year = growth_year %>% ISOdate(1, 1) %>% as.Date)
waste_join_growth_filled_2010 = waste_join_growth_filled %>%
dplyr::filter(growth_year == 2010, abs(gdp_growth) <= 20, plastic_standard < 2)
waste_join_growth_filled_2010 %>%
ggplot(aes(x = gdp_growth, y = plastic_standard)) +
geom_point(aes(colour = region), size = 3) +
geom_smooth(aes(group = 1), method = "lm", se = FALSE) +
ggsci::scale_color_d3()
waste_join_growth_filled_lm = waste_join_growth_filled %>%
group_by(growth_year) %>%
tidyr::nest() %>%
dplyr::mutate(
lm_tidy = purrr::map(
.x = data,
.f = ~ lm(plastic_standard ~ gdp_growth, data = .x) %>%
broom::tidy()),
slope_est = purrr::map_dbl(
lm_tidy, .f = ~ .x$estimate[2]) %>%
round(4) %>% paste0("Slope = ", .),
slope_pval = purrr::map_dbl(
lm_tidy, .f = ~ .x$p.value[2]) %>%
round(4) %>% paste0("Slope pvalue = ", .),
corr_est = purrr::map_dbl(
.x = data,
.f = ~ cor(.x$plastic_standard, .x$gdp_growth)) %>%
round(4) %>% paste0("Corr = ", .),
) %>%
dplyr::select(-lm_tidy)
# tidyr::unnest()
waste_join_growth_filled_lm
## # A tibble: 17 x 5
## growth_year data slope_est slope_pval corr_est
## <chr> <list> <chr> <chr> <chr>
## 1 2001 <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 2 2002 <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 3 2003 <tibble [131 ×… Slope = 4e-04 Slope pvalue = 0… Corr = 0.01…
## 4 2004 <tibble [131 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 5 2005 <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 6 2006 <tibble [132 ×… Slope = -7e-… Slope pvalue = 0… Corr = -0.02
## 7 2007 <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 8 2008 <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 9 2009 <tibble [134 ×… Slope = -0.0… Slope pvalue = 1… Corr = -0.3…
## 10 2010 <tibble [134 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 11 2011 <tibble [133 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.1…
## 12 2012 <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 13 2013 <tibble [132 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.2…
## 14 2014 <tibble [130 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 15 2015 <tibble [130 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
## 16 2016 <tibble [131 ×… Slope = 5e-04 Slope pvalue = 0… Corr = 0.01…
## 17 2017 <tibble [128 ×… Slope = -0.0… Slope pvalue = 0… Corr = -0.0…
gif = waste_join_growth_filled %>%
ggplot(aes(x = gdp_growth,
y = plastic_standard)) +
geom_point(aes(colour = region, group = 1L), size = 3) +
geom_smooth(aes(group = growth_year), method = "lm", se = FALSE) +
geom_text(data = waste_join_growth_filled_lm,
mapping = aes(x = -10, y = 0.6, label = slope_est),
colour = "red", size = 5) +
geom_text(data = waste_join_growth_filled_lm,
mapping = aes(x = -10, y = 0.5, label = slope_pval),
colour = "red", size = 5) +
geom_text(data = waste_join_growth_filled_lm,
mapping = aes(x = -10, y = 0.4, label = corr_est),
colour = "red", size = 5) +
ggsci::scale_colour_d3() +
transition_states(growth_year,
transition_length = 0.1,
state_length = 0.5) +
labs(title = 'Year: {closest_state}',
x = 'GDP growth per capita',
y = 'Plastic waste per capita (kg/day), in 2010') +
# theme(title = slope_est) +
# shadow_mark() + ## Make points stay
ease_aes('linear')
# facet_wrap(~growth_year)
anim_save(filename = "gdp_growth_vs_plastic_per_cap.gif",
animation = gif, width = 800)
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggsci_2.9 skimr_1.0.5 gganimate_1.0.3 janitor_1.2.0
## [5] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.1 purrr_0.3.2
## [9] readr_1.3.1 tidyr_0.8.3 tibble_2.1.1 ggplot2_3.1.1
## [13] tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 lubridate_1.7.4 lattice_0.20-38
## [4] class_7.3-15 prettyunits_1.0.2 png_0.1-7
## [7] assertthat_0.2.1 zeallot_0.1.0 digest_0.6.19
## [10] utf8_1.1.4 R6_2.4.0 cellranger_1.1.0
## [13] plyr_1.8.4 backports_1.1.4 e1071_1.7-1
## [16] evaluate_0.13 httr_1.4.0 pillar_1.4.0
## [19] rlang_0.3.4 progress_1.2.2 lazyeval_0.2.2
## [22] curl_3.3 readxl_1.3.1 rstudioapi_0.10
## [25] gifski_0.8.6 rmarkdown_1.12 labeling_0.3
## [28] munsell_0.5.0 broom_0.5.2 compiler_3.6.0
## [31] modelr_0.1.4 xfun_0.7 pkgconfig_2.0.2
## [34] htmltools_0.3.6 tidyselect_0.2.5 lpSolve_5.6.13
## [37] fansi_0.4.0 sf_0.7-4 crayon_1.3.4
## [40] withr_2.1.2 grid_3.6.0 DBI_1.0.0
## [43] nlme_3.1-140 jsonlite_1.6 gtable_0.3.0
## [46] magrittr_1.5 units_0.6-3 scales_1.0.0
## [49] KernSmooth_2.23-15 cli_1.1.0 stringi_1.4.3
## [52] farver_1.1.0 snakecase_0.10.0 xml2_1.2.0
## [55] generics_0.0.2 vctrs_0.1.0 transformr_0.1.1
## [58] tools_3.6.0 glue_1.3.1 tweenr_1.0.1
## [61] hms_0.4.2 yaml_2.2.0 colorspace_1.4-1
## [64] classInt_0.3-3 rvest_0.3.4 knitr_1.23
## [67] haven_2.1.0