library(tidyverse)
library(janitor)
library(skimr)
library(ggpubr)
library(survminer)
library(survival)
theme_set(theme_classic(18) +
theme(legend.position = "bottom"))
raw_data = read_csv("emperors.csv") %>%
janitor::clean_names()
## Parsed with column specification:
## cols(
## index = col_double(),
## name = col_character(),
## name_full = col_character(),
## birth = col_date(format = ""),
## death = col_date(format = ""),
## birth_cty = col_character(),
## birth_prv = col_character(),
## rise = col_character(),
## reign_start = col_date(format = ""),
## reign_end = col_date(format = ""),
## cause = col_character(),
## killer = col_character(),
## dynasty = col_character(),
## era = col_character(),
## notes = col_character(),
## verif_who = col_character()
## )
raw_data %>%
skimr::skim()
## Skim summary statistics
## n obs: 68
## n variables: 16
##
## ── Variable type:character ─────────────────────────────────────────────────────────────────────────────
## variable missing complete n min max empty n_unique
## birth_cty 17 51 68 4 16 0 30
## birth_prv 0 68 68 5 18 0 18
## cause 0 68 68 7 14 0 7
## dynasty 0 68 68 7 14 0 8
## era 0 68 68 8 10 0 2
## killer 0 68 68 4 16 0 14
## name 0 68 68 4 21 0 68
## name_full 0 68 68 26 72 0 67
## notes 22 46 68 22 104 0 23
## rise 0 68 68 8 31 0 8
## verif_who 57 11 68 22 23 0 2
##
## ── Variable type:Date ──────────────────────────────────────────────────────────────────────────────────
## variable missing complete n min max median n_unique
## birth 5 63 68 0002-12-24 0371-01-01 0201-01-01 60
## death 0 68 68 0014-08-19 0395-01-17 0251-08-08 66
## reign_end 0 68 68 0014-08-19 0395-01-17 0251-08-08 65
## reign_start 0 68 68 0014-09-18 0379-01-01 0250-08-08 57
##
## ── Variable type:numeric ───────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100 hist
## index 0 68 68 34.5 19.77 1 17.75 34.5 51.25 68 ▇▇▇▇▇▇▇▇
An overwhelming number of emperors died during their reign. What a high risk job.
table(raw_data$reign_end == raw_data$death)
##
## FALSE TRUE
## 6 62
surv_data = raw_data %>%
dplyr::filter(reign_end == death) %>%
dplyr::mutate(
# surv_time = abs(death - birth) %>% as.integer,
reign_end_num = reign_end %>% stringr::str_sub(2, 4) %>% as.numeric(),
surv_time = (abs(reign_end - reign_start)/365) %>% as.numeric,
surv_dead = case_when(
cause == "Natural Causes" ~ "natural",
cause == "Unknown" ~ "unknown",
TRUE ~ "killed"
)
) %>%
dplyr::filter(
complete.cases(surv_time),
surv_dead != "unknown"
) %>%
dplyr::mutate(
surv_dead_int = (surv_dead == "natural") + 0L,
rise = case_when(
str_detect(rise, "Appointment") ~ "appointment",
rise %in% c("Election", "Birthright", "Purchase") ~ "peaceful",
rise %in% c("Seized Power") ~ "seized power",
) %>% fct_relevel("peaceful"),
killer = case_when(
killer %in% c("Aneurism", "Disease", "Heart Failure", "Fire", "Lightning") ~ "natural",
TRUE ~ "other"
) %>% fct_relevel("natural"),
)
# surv_data$surv_time
# surv_data$surv_dead
surv_data %>%
janitor::tabyl(rise, surv_dead) %>%
adorn_crosstab()
## Warning: 'adorn_crosstab' is deprecated.
## Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
## See help("Deprecated")
## rise killed natural
## 1 peaceful 66.7% (22) 33.3% (11)
## 2 appointment 73.7% (14) 26.3% (5)
## 3 seized power 50.0% (4) 50.0% (4)
# adorn_totals(where = c("row", "col"))
library(ggbeeswarm)
q1 = function(x){quantile(x, 0.25)}
q3 = function(x){quantile(x, 0.75)}
boxplot_reign = surv_data %>%
ggplot(aes(x = surv_dead,
y = surv_time)) +
geom_violin() +
stat_summary(
fun.y = median,
fun.ymin = q1,
fun.ymax = q3,
size = 1.5,
colour = "red") +
geom_beeswarm() +
facet_grid(~rise, labeller = label_both) +
labs(x = "Survival status",
y = "Survival time (years of reign)")
boxplot_reign
ggsave(filename = "boxplot_reign.png", plot = boxplot_reign,
height = 5, width = 8)
fit = survfit(Surv(surv_time, surv_dead_int) ~ rise, data = surv_data)
fit
## Call: survfit(formula = Surv(surv_time, surv_dead_int) ~ rise, data = surv_data)
##
## n events median 0.95LCL 0.95UCL
## rise=peaceful 33 11 20.93 19.04 NA
## rise=appointment 19 5 6.00 1.36 NA
## rise=seized power 8 4 9.51 1.33 NA
surv_plot_rise = ggsurvplot(fit,
data = surv_data,
pval = TRUE,
palette = "Set1")
surv_plot_rise = surv_plot_rise$plot +
labs(x = "Time (years of reign)")
surv_plot_rise
ggsave(filename = "surv_plot_emperor_rise.png", plot = surv_plot_rise,
height = 6, width = 8)
coxph(Surv(surv_time, surv_dead_int) ~ rise + reign_end_num, data = surv_data) %>%
broom::tidy()
## # A tibble: 3 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 riseappointment 2.26e+0 0.772 2.92 0.00351 0.741 3.77
## 2 riseseized power 1.98e+0 0.690 2.87 0.00406 0.630 3.33
## 3 reign_end_num -1.28e-5 0.00253 -0.00508 0.996 -0.00496 0.00494
timeline_data = raw_data %>%
dplyr::mutate(
surv_dead = case_when(
cause == "Natural Causes" ~ "natural",
cause == "Unknown" ~ "unknown",
TRUE ~ "killed"),
surv_dead_int = (surv_dead == "natural") + 0L,
rise = case_when(
str_detect(rise, "Appointment") ~ "appointment",
rise %in% c("Election", "Birthright", "Purchase") ~ "peaceful",
rise %in% c("Seized Power") ~ "seized power",
) %>% fct_relevel("peaceful"),
killer = case_when(
killer %in% c("Aneurism", "Disease", "Heart Failure", "Fire", "Lightning") ~ "natural",
TRUE ~ "other"
) %>% fct_relevel("natural"),
name = as_factor(name) %>% fct_rev(),
reign_start = reign_start %>% stringr::str_sub(2, 4) %>% as.numeric(),
reign_end = reign_end %>% stringr::str_sub(2, 4) %>% as.numeric()
)
timeline_data$reign_start[1] = -timeline_data$reign_start[1]
timeline_surv = timeline_data %>%
ggplot(aes(x = reign_start, y = name,
colour = surv_dead)) +
geom_errorbarh(aes(xmin = reign_start, xmax = reign_end), size = 1.5) +
scale_colour_brewer(palette = "Set1") +
labs(x = "Year", y = "", colour = "Cause of death") +
theme_minimal(18) +
theme(axis.ticks.y = element_blank(),
legend.position = "bottom",
panel.grid.major = element_blank())
timeline_rise = timeline_data %>%
ggplot(aes(x = reign_start, y = name,
colour = rise)) +
geom_errorbarh(aes(xmin = reign_start, xmax = reign_end), size = 1.5) +
ggsci::scale_color_jama() +
labs(x = "Year", y = "", colour = "Rise to power") +
theme_minimal(18) +
theme(axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
panel.grid.major = element_blank(),
axis.text.y = element_blank())
timeline_plot = ggpubr::ggarrange(timeline_surv, timeline_rise, nrow = 1)
timeline_plot
ggsave(filename = "timeline_plot.png", plot = timeline_plot,
height = 13, width = 20)
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggbeeswarm_0.6.0 survival_2.44-1.1 survminer_0.4.4
## [4] ggpubr_0.2.1 magrittr_1.5 skimr_1.0.7
## [7] janitor_1.2.0 forcats_0.4.0 stringr_1.4.0
## [10] dplyr_0.8.3 purrr_0.3.2 readr_1.3.1
## [13] tidyr_0.8.3 tibble_2.1.3 ggplot2_3.2.0
## [16] tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 lubridate_1.7.4 lattice_0.20-38
## [4] zoo_1.8-6 utf8_1.1.4 assertthat_0.2.1
## [7] zeallot_0.1.0 digest_0.6.20 plyr_1.8.4
## [10] R6_2.4.0 cellranger_1.1.0 backports_1.1.4
## [13] evaluate_0.14 httr_1.4.0 pillar_1.4.2
## [16] rlang_0.4.0 lazyeval_0.2.2 readxl_1.3.1
## [19] rstudioapi_0.10 data.table_1.12.2 Matrix_1.2-17
## [22] rmarkdown_1.13 labeling_0.3 splines_3.6.0
## [25] munsell_0.5.0 broom_0.5.2 vipor_0.4.5
## [28] compiler_3.6.0 modelr_0.1.4 xfun_0.8
## [31] pkgconfig_2.0.2 htmltools_0.3.6 tidyselect_0.2.5
## [34] gridExtra_2.3 km.ci_0.5-2 fansi_0.4.0
## [37] crayon_1.3.4 withr_2.1.2 grid_3.6.0
## [40] nlme_3.1-140 jsonlite_1.6 xtable_1.8-4
## [43] gtable_0.3.0 KMsurv_0.1-5 scales_1.0.0
## [46] cli_1.1.0 stringi_1.4.3 reshape2_1.4.3
## [49] ggsignif_0.5.0 snakecase_0.11.0 xml2_1.2.0
## [52] ellipsis_0.2.0.1 survMisc_0.5.5 vctrs_0.2.0
## [55] generics_0.0.2 cowplot_1.0.0 ggsci_2.9
## [58] RColorBrewer_1.1-2 tools_3.6.0 cmprsk_2.2-8
## [61] glue_1.3.1 beeswarm_0.2.3 hms_0.5.0
## [64] yaml_2.2.0 colorspace_1.4-1 rvest_0.3.4
## [67] knitr_1.23 haven_2.1.1