library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3          ✓ purrr   0.3.4     
## ✓ tibble  3.1.2          ✓ dplyr   1.0.6.9000
## ✓ tidyr   1.1.3          ✓ stringr 1.4.0     
## ✓ readr   1.4.0          ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(funFEM)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: fda
## Loading required package: splines
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: fds
## Loading required package: rainbow
## Loading required package: pcaPP
## Loading required package: RCurl
## 
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
## 
##     complete
## 
## Attaching package: 'fda'
## The following object is masked from 'package:graphics':
## 
##     matplot
## Loading required package: elasticnet
## Loading required package: lars
## Loaded lars 1.2
library(ggrepel)

theme_set(theme_classic(18) +
            theme(legend.position = "bottom"))

knitr::opts_chunk$set(warning = FALSE,
                      message = FALSE)

# library(tidytuesdayR)
# tuesdata <- tidytuesdayR::tt_load('2021-03-23')

unvotes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/unvotes.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   rcid = col_double(),
##   country = col_character(),
##   country_code = col_character(),
##   vote = col_character()
## )
roll_calls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/roll_calls.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   rcid = col_double(),
##   session = col_double(),
##   importantvote = col_double(),
##   date = col_date(format = ""),
##   unres = col_character(),
##   amend = col_double(),
##   para = col_double(),
##   short = col_character(),
##   descr = col_character()
## )
issues <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/issues.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   rcid = col_double(),
##   short_name = col_character(),
##   issue = col_character()
## )

1 The 5 permanent members of the security council

# unvotes %>% dplyr::filter(country == "United Kingdom")

unvotes_5 = unvotes %>% 
  dplyr::filter(country_code %in% c("US", "GB", "FR", "RU", "CN"))

unvotes_5_joined = unvotes_5 %>% 
  inner_join(roll_calls, by = "rcid") %>% 
  inner_join(issues, by = "rcid") %>% 
  dplyr::mutate(
    year = lubridate::year(date))

1.1 Aggregated voting patterns of the 5 countries

unvotes_5_percent = unvotes_5_joined %>% 
  group_by(year, country_code, vote) %>% 
  tally() %>% 
  group_by(year, country_code) %>% 
  dplyr::mutate(percent = n/sum(n),
                vote = vote %>% fct_relevel("no", "abstain")) %>% 
  ungroup()

unvotes_5_percent %>% 
  ggplot(aes(x = year, 
             y = percent,
             colour = country_code)) +
  geom_line() + 
  facet_wrap(~vote, nrow = 3)

unvotes_5_percent %>% 
  ggplot(aes(x = year, 
             y = percent,
             fill = vote)) +
  geom_col() +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~country_code, nrow = 3)

1.2 Voting patterns of the 5 countries, split by issues

unvotes_5_percent = unvotes_5_joined %>% 
  group_by(year, country_code, vote, issue) %>% 
  tally() %>% 
  group_by(year, country_code, issue) %>% 
  dplyr::mutate(percent = n/sum(n),
                vote = vote %>% fct_relevel("no", "abstain")) %>% 
  ungroup()

unvotes_5_percent %>% 
  ggplot(aes(x = year, 
             y = percent,
             colour = country_code)) +
  geom_line() + 
  facet_wrap(~vote, nrow = 3)

unvotes_5_percent %>% 
  ggplot(aes(x = year, 
             y = percent,
             fill = vote)) +
  geom_col() +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Set1") +
  facet_grid(issue~country_code)

2 Clustering on 100 countries

We first calculate the percentage a country voted “yes” in a given year. We will filter for the 100 countries with the most votes (plus some hand-selected ones).

unvotes_joined = unvotes %>% 
  inner_join(roll_calls, by = "rcid") %>% 
  inner_join(issues, by = "rcid") %>% 
  dplyr::mutate(
    year = lubridate::year(date)) %>% 
  dplyr::select(country, year, issue, vote)

unvotes_joined_percent = unvotes_joined %>% 
  # dplyr::filter(issue == "Colonialism") %>%
  group_by(year, country, vote) %>% 
  tally() %>% 
  group_by(year, country) %>% 
  dplyr::mutate(percent = n/sum(n)) %>% 
  dplyr::filter(vote == "yes") %>% 
  dplyr::select(-n, -vote)

selected_countries = unvotes_joined %>% 
  group_by(country) %>% 
  tally() %>% 
  dplyr::arrange(desc(n)) %>% 
  dplyr::slice(1:100)

unvotes_joined_percent_wide = unvotes_joined_percent %>% 
  dplyr::filter(country %in% c(selected_countries$country, "China")) %>% 
  pivot_wider(names_from = "country",
              values_from = "percent") %>% 
  dplyr::mutate(across(.fns = ~coalesce(.x, 0))) %>% 
  ungroup()

unvotes_joined_percent_wide_mat = unvotes_joined_percent_wide %>% 
  as.data.frame() %>% 
  tibble::column_to_rownames(var = "year") %>% 
  as.matrix()
hc <- hclust(dist(t(unvotes_joined_percent_wide_mat)), "ave")
plot(hc)

set.seed(20210624)
basis <- create.bspline.basis(c(0, nrow(unvotes_joined_percent_wide_mat)), nbasis = 20, norder = 4) # norder=4 : cubic spline
fdobj <- smooth.basis(argvals = seq_len(nrow(unvotes_joined_percent_wide_mat)), 
                      y = unvotes_joined_percent_wide_mat, 
                      basis)$fd

res <- funFEM(fdobj, K = 12, model = "AkjBk")
unvotes_joined_percent_plotdf = unvotes_joined_percent %>% 
  inner_join(tibble(
    country = colnames(unvotes_joined_percent_wide_mat), 
    cluster = res$cls), 
    by = "country") %>% 
  group_by(cluster, country) %>% 
  dplyr::mutate(
    # label = ifelse(year == max(year), country, NA)
    label = ifelse(year == sample(year, 1), country, NA))


unvotes_joined_percent_plotdf %>% 
  ggplot(aes(x = year, 
             y = percent,
             group = country,
             label = label)) +
  geom_line(size = 0.1) +
  ggrepel::geom_text_repel(colour = "#3079ff",
                           fontface = "bold",
                           size = 4) +
  facet_wrap(~cluster, labeller = label_both)

unvotes_joined_percent_plotdf %>% 
  dplyr::filter(cluster %in% c(3, 4, 5, 6, 8, 12)) %>% 
  ggplot(aes(x = year, 
             y = percent,
             group = country,
             label = label)) +
  geom_line(size = 0.1) +
  ggrepel::geom_text_repel(colour = "#3079ff",
                           fontface = "bold",
                           size = 4) +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(~cluster, labeller = label_both)