In this document, we will visualise the cleaned cricket
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
clean_test_batting_inngings = readr::read_csv("./clean_test_batting_inngings.csv") %>%
dplyr::mutate(runs_num = runs %>% stringr::str_extract("\\-*\\d+\\.*\\d*") %>% as.integer(),
not_out = runs %>% stringr::str_detect("[*]"),
dead = !not_out)
## Parsed with column specification:
## cols(
## country = col_character(),
## player_name = col_character(),
## runs = col_character(),
## mins = col_double(),
## balls_faced = col_double(),
## fours = col_double(),
## sixes = col_double(),
## strike_rate = col_double(),
## pos = col_double(),
## dismissal = col_character(),
## inns = col_double(),
## opposition = col_character(),
## ground = col_character(),
## start_date = col_character(),
## test_number = col_character()
## )
glimpse(clean_test_batting_inngings)
## Observations: 94,604
## Variables: 18
## $ country <chr> "australia", "australia", "australia", "australia", …
## $ player_name <chr> "C Bannerman", "C Bannerman", "C Bannerman", "C Bann…
## $ runs <chr> "165*", "4", "10", "30", "15", "15*", "17", "6", "5"…
## $ mins <dbl> 285, 8, 55, 13, NA, 10, NA, 16, NA, NA, NA, NA, 1, 4…
## $ balls_faced <dbl> NA, 10, NA, NA, NA, NA, NA, 20, NA, NA, NA, NA, 2, 5…
## $ fours <dbl> 18, 1, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, 0, 2, …
## $ sixes <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, …
## $ strike_rate <dbl> NA, 40.00, NA, NA, NA, NA, NA, 30.00, NA, NA, NA, NA…
## $ pos <dbl> 1, 1, 2, 3, 1, 1, 8, 8, 3, 10, 9, NA, 6, 7, 7, 1, 2,…
## $ dismissal <chr> "retired notout", "bowled", "bowled", "caught", "bow…
## $ inns <dbl> 1, 3, 1, 3, 2, 4, 1, 3, 1, 3, 2, 4, 2, 3, 2, 4, 2, 4…
## $ opposition <chr> "v England", "v England", "v England", "v England", …
## $ ground <chr> "Melbourne", "Melbourne", "Melbourne", "Melbourne", …
## $ start_date <chr> "15 Mar 1877", "15 Mar 1877", "31 Mar 1877", "31 Mar…
## $ test_number <chr> "Test # 1", "Test # 1", "Test # 2", "Test # 2", "Tes…
## $ runs_num <int> 165, 4, 10, 30, 15, 15, 17, 6, 5, 26, 6, NA, 0, 19, …
## $ not_out <lgl> TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ dead <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TR…
selected_players = clean_test_batting_inngings %>%
dplyr::filter(player_name %in% c("MJ Clarke", "SPD Smith", "RT Ponting", "SR Waugh"))
selected_players_df = selected_players %>%
dplyr::transmute(player_name,
runs_num,
dead) %>%
as.data.frame()
library(survival)
fit = survfit(Surv(runs_num, dead) ~ player_name, data = selected_players_df)
survminer::ggsurvplot(
fit = fit,
data = selected_players_df,
pval = TRUE, palette = "Set1")
fit_bf = survfit(Surv(balls_faced, dead) ~ 1, data = clean_test_batting_inngings)
plot_bf = survminer::ggsurvplot(
fit = fit_bf,
data = clean_test_batting_inngings,
palette = "Set1",
surv.median.line = "hv")$plot +
labs(x = "Balls faced before getting out",
main = "Balls faced survival plot") +
scale_x_continuous(breaks = c(30, 100, 200, 400, 600, 800))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
fit_runs = survfit(Surv(runs_num, dead) ~ 1, data = clean_test_batting_inngings)
plot_runs = survminer::ggsurvplot(
fit = fit_runs,
data = clean_test_batting_inngings,
palette = "Set1",
surv.median.line = "hv")$plot +
labs(x = "Runs scored before getting out",
main = "Runs scored survival plot") +
scale_x_continuous(breaks = c(30, 100, 200, 400, 600, 800))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
ggpubr::ggarrange(plot_bf, plot_runs)
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] survival_2.44-1.1 survminer_0.4.4 ggpubr_0.2.1
## [4] magrittr_1.5 forcats_0.4.0 stringr_1.4.0
## [7] dplyr_0.8.3 purrr_0.3.2 readr_1.3.1
## [10] tidyr_0.8.3 tibble_2.1.3 ggplot2_3.2.0
## [13] tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] zoo_1.8-6 tidyselect_0.2.5 xfun_0.8
## [4] splines_3.6.0 haven_2.1.1 lattice_0.20-38
## [7] colorspace_1.4-1 generics_0.0.2 vctrs_0.2.0
## [10] htmltools_0.3.6 yaml_2.2.0 utf8_1.1.4
## [13] survMisc_0.5.5 rlang_0.4.0 pillar_1.4.2
## [16] glue_1.3.1 withr_2.1.2 RColorBrewer_1.1-2
## [19] modelr_0.1.4 readxl_1.3.1 munsell_0.5.0
## [22] ggsignif_0.5.0 gtable_0.3.0 cellranger_1.1.0
## [25] rvest_0.3.4 evaluate_0.14 labeling_0.3
## [28] knitr_1.23 fansi_0.4.0 broom_0.5.2
## [31] Rcpp_1.0.1 xtable_1.8-4 scales_1.0.0
## [34] backports_1.1.4 cmprsk_2.2-8 jsonlite_1.6
## [37] km.ci_0.5-2 gridExtra_2.3 hms_0.5.0
## [40] digest_0.6.20 stringi_1.4.3 KMsurv_0.1-5
## [43] cowplot_1.0.0 grid_3.6.0 cli_1.1.0
## [46] tools_3.6.0 lazyeval_0.2.2 crayon_1.3.4
## [49] pkgconfig_2.0.2 zeallot_0.1.0 Matrix_1.2-17
## [52] data.table_1.12.2 xml2_1.2.0 lubridate_1.7.4
## [55] assertthat_0.2.1 rmarkdown_1.13 httr_1.4.0
## [58] rstudioapi_0.10 R6_2.4.0 nlme_3.1-140
## [61] compiler_3.6.0