library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6.9000
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(funFEM)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: fda
## Loading required package: splines
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: fds
## Loading required package: rainbow
## Loading required package: pcaPP
## Loading required package: RCurl
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
##
## Attaching package: 'fda'
## The following object is masked from 'package:graphics':
##
## matplot
## Loading required package: elasticnet
## Loading required package: lars
## Loaded lars 1.2
library(ggrepel)
theme_set(theme_classic(18) +
theme(legend.position = "bottom"))
knitr::opts_chunk$set(warning = FALSE,
message = FALSE)
# library(tidytuesdayR)
# tuesdata <- tidytuesdayR::tt_load('2021-03-23')
unvotes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/unvotes.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## rcid = col_double(),
## country = col_character(),
## country_code = col_character(),
## vote = col_character()
## )
roll_calls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/roll_calls.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## rcid = col_double(),
## session = col_double(),
## importantvote = col_double(),
## date = col_date(format = ""),
## unres = col_character(),
## amend = col_double(),
## para = col_double(),
## short = col_character(),
## descr = col_character()
## )
issues <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/issues.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## rcid = col_double(),
## short_name = col_character(),
## issue = col_character()
## )
# unvotes %>% dplyr::filter(country == "United Kingdom")
unvotes_5 = unvotes %>%
dplyr::filter(country_code %in% c("US", "GB", "FR", "RU", "CN"))
unvotes_5_joined = unvotes_5 %>%
inner_join(roll_calls, by = "rcid") %>%
inner_join(issues, by = "rcid") %>%
dplyr::mutate(
year = lubridate::year(date))
unvotes_5_percent = unvotes_5_joined %>%
group_by(year, country_code, vote) %>%
tally() %>%
group_by(year, country_code) %>%
dplyr::mutate(percent = n/sum(n),
vote = vote %>% fct_relevel("no", "abstain")) %>%
ungroup()
unvotes_5_percent %>%
ggplot(aes(x = year,
y = percent,
colour = country_code)) +
geom_line() +
facet_wrap(~vote, nrow = 3)
unvotes_5_percent %>%
ggplot(aes(x = year,
y = percent,
fill = vote)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set1") +
facet_wrap(~country_code, nrow = 3)
unvotes_5_percent = unvotes_5_joined %>%
group_by(year, country_code, vote, issue) %>%
tally() %>%
group_by(year, country_code, issue) %>%
dplyr::mutate(percent = n/sum(n),
vote = vote %>% fct_relevel("no", "abstain")) %>%
ungroup()
unvotes_5_percent %>%
ggplot(aes(x = year,
y = percent,
colour = country_code)) +
geom_line() +
facet_wrap(~vote, nrow = 3)
unvotes_5_percent %>%
ggplot(aes(x = year,
y = percent,
fill = vote)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set1") +
facet_grid(issue~country_code)
We first calculate the percentage a country voted “yes” in a given year. We will filter for the 100 countries with the most votes (plus some hand-selected ones).
unvotes_joined = unvotes %>%
inner_join(roll_calls, by = "rcid") %>%
inner_join(issues, by = "rcid") %>%
dplyr::mutate(
year = lubridate::year(date)) %>%
dplyr::select(country, year, issue, vote)
unvotes_joined_percent = unvotes_joined %>%
# dplyr::filter(issue == "Colonialism") %>%
group_by(year, country, vote) %>%
tally() %>%
group_by(year, country) %>%
dplyr::mutate(percent = n/sum(n)) %>%
dplyr::filter(vote == "yes") %>%
dplyr::select(-n, -vote)
selected_countries = unvotes_joined %>%
group_by(country) %>%
tally() %>%
dplyr::arrange(desc(n)) %>%
dplyr::slice(1:100)
unvotes_joined_percent_wide = unvotes_joined_percent %>%
dplyr::filter(country %in% c(selected_countries$country, "China")) %>%
pivot_wider(names_from = "country",
values_from = "percent") %>%
dplyr::mutate(across(.fns = ~coalesce(.x, 0))) %>%
ungroup()
unvotes_joined_percent_wide_mat = unvotes_joined_percent_wide %>%
as.data.frame() %>%
tibble::column_to_rownames(var = "year") %>%
as.matrix()
hc <- hclust(dist(t(unvotes_joined_percent_wide_mat)), "ave")
plot(hc)
set.seed(20210624)
basis <- create.bspline.basis(c(0, nrow(unvotes_joined_percent_wide_mat)), nbasis = 20, norder = 4) # norder=4 : cubic spline
fdobj <- smooth.basis(argvals = seq_len(nrow(unvotes_joined_percent_wide_mat)),
y = unvotes_joined_percent_wide_mat,
basis)$fd
res <- funFEM(fdobj, K = 12, model = "AkjBk")
unvotes_joined_percent_plotdf = unvotes_joined_percent %>%
inner_join(tibble(
country = colnames(unvotes_joined_percent_wide_mat),
cluster = res$cls),
by = "country") %>%
group_by(cluster, country) %>%
dplyr::mutate(
# label = ifelse(year == max(year), country, NA)
label = ifelse(year == sample(year, 1), country, NA))
unvotes_joined_percent_plotdf %>%
ggplot(aes(x = year,
y = percent,
group = country,
label = label)) +
geom_line(size = 0.1) +
ggrepel::geom_text_repel(colour = "#3079ff",
fontface = "bold",
size = 4) +
facet_wrap(~cluster, labeller = label_both)
unvotes_joined_percent_plotdf %>%
dplyr::filter(cluster %in% c(3, 4, 5, 6, 8, 12)) %>%
ggplot(aes(x = year,
y = percent,
group = country,
label = label)) +
geom_line(size = 0.1) +
ggrepel::geom_text_repel(colour = "#3079ff",
fontface = "bold",
size = 4) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~cluster, labeller = label_both)