1 Loading the packages

library(tidyverse)
library(janitor)
library(skimr)
library(ggpubr)
library(survminer)
library(survival)


theme_set(theme_classic(18) +
            theme(legend.position = "bottom"))

2 Load data

raw_data = read_csv("emperors.csv") %>% 
  janitor::clean_names()
## Parsed with column specification:
## cols(
##   index = col_double(),
##   name = col_character(),
##   name_full = col_character(),
##   birth = col_date(format = ""),
##   death = col_date(format = ""),
##   birth_cty = col_character(),
##   birth_prv = col_character(),
##   rise = col_character(),
##   reign_start = col_date(format = ""),
##   reign_end = col_date(format = ""),
##   cause = col_character(),
##   killer = col_character(),
##   dynasty = col_character(),
##   era = col_character(),
##   notes = col_character(),
##   verif_who = col_character()
## )
raw_data %>% 
  skimr::skim()
## Skim summary statistics
##  n obs: 68 
##  n variables: 16 
## 
## ── Variable type:character ─────────────────────────────────────────────────────────────────────────────
##   variable missing complete  n min max empty n_unique
##  birth_cty      17       51 68   4  16     0       30
##  birth_prv       0       68 68   5  18     0       18
##      cause       0       68 68   7  14     0        7
##    dynasty       0       68 68   7  14     0        8
##        era       0       68 68   8  10     0        2
##     killer       0       68 68   4  16     0       14
##       name       0       68 68   4  21     0       68
##  name_full       0       68 68  26  72     0       67
##      notes      22       46 68  22 104     0       23
##       rise       0       68 68   8  31     0        8
##  verif_who      57       11 68  22  23     0        2
## 
## ── Variable type:Date ──────────────────────────────────────────────────────────────────────────────────
##     variable missing complete  n        min        max     median n_unique
##        birth       5       63 68 0002-12-24 0371-01-01 0201-01-01       60
##        death       0       68 68 0014-08-19 0395-01-17 0251-08-08       66
##    reign_end       0       68 68 0014-08-19 0395-01-17 0251-08-08       65
##  reign_start       0       68 68 0014-09-18 0379-01-01 0250-08-08       57
## 
## ── Variable type:numeric ───────────────────────────────────────────────────────────────────────────────
##  variable missing complete  n mean    sd p0   p25  p50   p75 p100     hist
##     index       0       68 68 34.5 19.77  1 17.75 34.5 51.25   68 ▇▇▇▇▇▇▇▇

3 How many emperors die during reign

An overwhelming number of emperors died during their reign. What a high risk job.

table(raw_data$reign_end == raw_data$death)
## 
## FALSE  TRUE 
##     6    62

4 Cleaning up the data for survival analysis

surv_data = raw_data %>% 
  dplyr::filter(reign_end == death) %>% 
  dplyr::mutate(
    # surv_time = abs(death - birth) %>% as.integer,
    reign_end_num = reign_end %>% stringr::str_sub(2, 4) %>% as.numeric(),
    surv_time = (abs(reign_end - reign_start)/365) %>% as.numeric,
    surv_dead = case_when(
      cause == "Natural Causes" ~ "natural", 
      cause == "Unknown" ~ "unknown",
      TRUE ~ "killed"
    )
  ) %>% 
  dplyr::filter(
    complete.cases(surv_time), 
    surv_dead != "unknown"
  ) %>% 
  dplyr::mutate(
    surv_dead_int = (surv_dead == "natural") + 0L,
    rise = case_when(
      str_detect(rise, "Appointment") ~ "appointment",
      rise %in% c("Election", "Birthright", "Purchase") ~ "peaceful", 
      rise %in% c("Seized Power") ~ "seized power", 
    ) %>% fct_relevel("peaceful"),
    killer = case_when(
      killer %in% c("Aneurism", "Disease", "Heart Failure", "Fire", "Lightning") ~ "natural", 
      TRUE ~ "other"
    ) %>% fct_relevel("natural"),
  )

# surv_data$surv_time
# surv_data$surv_dead

surv_data %>% 
  janitor::tabyl(rise, surv_dead) %>% 
  adorn_crosstab()
## Warning: 'adorn_crosstab' is deprecated.
## Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
## See help("Deprecated")
##           rise     killed    natural
## 1     peaceful 66.7% (22) 33.3% (11)
## 2  appointment 73.7% (14) 26.3%  (5)
## 3 seized power 50.0%  (4) 50.0%  (4)
# adorn_totals(where = c("row", "col")) 

library(ggbeeswarm)
q1 = function(x){quantile(x, 0.25)}
q3 = function(x){quantile(x, 0.75)}

boxplot_reign = surv_data %>% 
  ggplot(aes(x = surv_dead, 
             y = surv_time)) +
  geom_violin() +
  stat_summary(
    fun.y = median, 
    fun.ymin = q1,
    fun.ymax = q3,
    size = 1.5,
    colour = "red") +
  geom_beeswarm() +
  facet_grid(~rise, labeller = label_both) +
  labs(x = "Survival status", 
       y = "Survival time (years of reign)")

boxplot_reign

ggsave(filename = "boxplot_reign.png", plot = boxplot_reign,
       height = 5, width = 8)

4.1 Survival plot

fit = survfit(Surv(surv_time, surv_dead_int) ~ rise, data = surv_data)

fit
## Call: survfit(formula = Surv(surv_time, surv_dead_int) ~ rise, data = surv_data)
## 
##                    n events median 0.95LCL 0.95UCL
## rise=peaceful     33     11  20.93   19.04      NA
## rise=appointment  19      5   6.00    1.36      NA
## rise=seized power  8      4   9.51    1.33      NA
surv_plot_rise = ggsurvplot(fit, 
                            data = surv_data, 
                            pval = TRUE, 
                            palette = "Set1")

surv_plot_rise = surv_plot_rise$plot +
  labs(x = "Time (years of reign)")

surv_plot_rise

ggsave(filename = "surv_plot_emperor_rise.png", plot = surv_plot_rise,
       height = 6, width = 8)

4.2 Cox regression

coxph(Surv(surv_time, surv_dead_int) ~ rise + reign_end_num, data = surv_data) %>% 
  broom::tidy()
## # A tibble: 3 x 7
##   term              estimate std.error statistic p.value conf.low conf.high
##   <chr>                <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 riseappointment    2.26e+0   0.772     2.92    0.00351  0.741     3.77   
## 2 riseseized power   1.98e+0   0.690     2.87    0.00406  0.630     3.33   
## 3 reign_end_num     -1.28e-5   0.00253  -0.00508 0.996   -0.00496   0.00494

5 Timeline plot

timeline_data = raw_data %>% 
  dplyr::mutate(
    surv_dead = case_when(
      cause == "Natural Causes" ~ "natural", 
      cause == "Unknown" ~ "unknown",
      TRUE ~ "killed"), 
    surv_dead_int = (surv_dead == "natural") + 0L,
    rise = case_when(
      str_detect(rise, "Appointment") ~ "appointment",
      rise %in% c("Election", "Birthright", "Purchase") ~ "peaceful", 
      rise %in% c("Seized Power") ~ "seized power", 
    ) %>% fct_relevel("peaceful"),
    killer = case_when(
      killer %in% c("Aneurism", "Disease", "Heart Failure", "Fire", "Lightning") ~ "natural", 
      TRUE ~ "other"
    ) %>% fct_relevel("natural"),
    name = as_factor(name) %>% fct_rev(),
    reign_start = reign_start %>% stringr::str_sub(2, 4) %>% as.numeric(),
    reign_end = reign_end %>% stringr::str_sub(2, 4) %>% as.numeric()
    )

timeline_data$reign_start[1] = -timeline_data$reign_start[1]




timeline_surv = timeline_data %>%
  ggplot(aes(x = reign_start, y = name, 
             colour = surv_dead)) +
  geom_errorbarh(aes(xmin = reign_start, xmax = reign_end), size = 1.5) +
  scale_colour_brewer(palette = "Set1") +
  labs(x = "Year", y = "", colour = "Cause of death") +
  theme_minimal(18) +
  theme(axis.ticks.y = element_blank(),
legend.position = "bottom",
        panel.grid.major = element_blank())


timeline_rise = timeline_data %>% 
  ggplot(aes(x = reign_start, y = name, 
             colour = rise)) +
  geom_errorbarh(aes(xmin = reign_start, xmax = reign_end), size = 1.5) +
  ggsci::scale_color_jama() +
  labs(x = "Year", y = "", colour = "Rise to power") +
  theme_minimal(18) +
  theme(axis.ticks.y = element_blank(), 
        axis.line.y = element_blank(),
        legend.position = "bottom",
        panel.grid.major = element_blank(),
        axis.text.y = element_blank())


timeline_plot = ggpubr::ggarrange(timeline_surv, timeline_rise, nrow = 1)

timeline_plot

ggsave(filename = "timeline_plot.png", plot = timeline_plot,
       height = 13, width = 20)

6 Session Info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggbeeswarm_0.6.0  survival_2.44-1.1 survminer_0.4.4  
##  [4] ggpubr_0.2.1      magrittr_1.5      skimr_1.0.7      
##  [7] janitor_1.2.0     forcats_0.4.0     stringr_1.4.0    
## [10] dplyr_0.8.3       purrr_0.3.2       readr_1.3.1      
## [13] tidyr_0.8.3       tibble_2.1.3      ggplot2_3.2.0    
## [16] tidyverse_1.2.1  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         lubridate_1.7.4    lattice_0.20-38   
##  [4] zoo_1.8-6          utf8_1.1.4         assertthat_0.2.1  
##  [7] zeallot_0.1.0      digest_0.6.20      plyr_1.8.4        
## [10] R6_2.4.0           cellranger_1.1.0   backports_1.1.4   
## [13] evaluate_0.14      httr_1.4.0         pillar_1.4.2      
## [16] rlang_0.4.0        lazyeval_0.2.2     readxl_1.3.1      
## [19] rstudioapi_0.10    data.table_1.12.2  Matrix_1.2-17     
## [22] rmarkdown_1.13     labeling_0.3       splines_3.6.0     
## [25] munsell_0.5.0      broom_0.5.2        vipor_0.4.5       
## [28] compiler_3.6.0     modelr_0.1.4       xfun_0.8          
## [31] pkgconfig_2.0.2    htmltools_0.3.6    tidyselect_0.2.5  
## [34] gridExtra_2.3      km.ci_0.5-2        fansi_0.4.0       
## [37] crayon_1.3.4       withr_2.1.2        grid_3.6.0        
## [40] nlme_3.1-140       jsonlite_1.6       xtable_1.8-4      
## [43] gtable_0.3.0       KMsurv_0.1-5       scales_1.0.0      
## [46] cli_1.1.0          stringi_1.4.3      reshape2_1.4.3    
## [49] ggsignif_0.5.0     snakecase_0.11.0   xml2_1.2.0        
## [52] ellipsis_0.2.0.1   survMisc_0.5.5     vctrs_0.2.0       
## [55] generics_0.0.2     cowplot_1.0.0      ggsci_2.9         
## [58] RColorBrewer_1.1-2 tools_3.6.0        cmprsk_2.2-8      
## [61] glue_1.3.1         beeswarm_0.2.3     hms_0.5.0         
## [64] yaml_2.2.0         colorspace_1.4-1   rvest_0.3.4       
## [67] knitr_1.23         haven_2.1.1