Loading packages
library(tidyverse)
library(rvest)
library(xml2)
library(lubridate)
library(gganimate)
library(ggmap)
library(plotly)
library(glue)
Loading data
url = "https://www.health.nsw.gov.au/Infectious/diseases/Pages/coronavirus-flights.aspx"
raw = xml2::read_html(url)
## https://stackoverflow.com/questions/52855989/scrape-aspx-page-with-r
raw_flights_tbl = raw %>%
rvest::html_node(xpath = ".//div[@id='ctl00_PlaceHolderMain_contentc1__ControlWrapper_RichHtmlField']/table") %>%
rvest::html_table() %>%
as_tibble() %>%
janitor::clean_names()
Clean data
flights_tbl = raw_flights_tbl %>%
dplyr::mutate(date_of_departure = date_of_departure %>% dmy() %>% as_datetime(),
date_of_arrival = date_of_arrival %>% dmy() %>% as_datetime()) %>%
tidyr::separate(col = origin_destination,
into = c("origin", "destination"),
sep = "/") %>%
tibble::rowid_to_column("plane_id") ## Avoid repeated flight_number
Location query through Google Maps (API needed)
all_geocode = tibble(
location = c(flights_tbl$origin, flights_tbl$destination) %>% unique,
geocode = purrr::map(location, ggmap::geocode))
saveRDS(all_geocode, file = "data/all_geocode.rds")
all_geocode = readRDS("data/all_geocode.rds")
flights_location_tbl = flights_tbl %>%
left_join(all_geocode, by = c("origin" = "location")) %>%
left_join(all_geocode, by = c("destination" = "location"),
suffix = c("_origin", "_destination")) %>%
unnest(c(geocode_origin, geocode_destination), names_sep = "_") %>%
group_by(origin, destination) %>%
dplyr::mutate(
collapse_flights = paste(flight %>% unique, collapse = ", "),
hover = glue("Origin: {origin} \n Dest: {destination} \n Flight: {collapse_flights}")) %>%
group_by(origin) %>%
dplyr::mutate(origin_tally = n()) %>%
ungroup() %>%
dplyr::mutate(date_of_departure = as.character(date_of_departure))
# all_location_tbl = flights_tbl %>%
# dplyr::select(origin, destination) %>%
# group_by(origin, destination) %>%
# dplyr::mutate(count = n()) %>%
# pivot_longer(cols = c("origin", "destination"),
# names_to = "location_type",
# values_to = "location_value") %>%
# left_join(all_geocode, by = c("location_value" = "location")) %>%
# unnest(geocode)
Plotly visualisations
geo <- list(
scope = 'world',
projection = list(type = 'azimuthal equal area'),
showland = TRUE,
landcolor = toRGB("gray95"),
countrycolor = toRGB("gray80")
)
fig <- plot_geo(locationmode = 'ISO-3', color = I("red"))
# fig <- fig %>% add_markers(
# data = all_location_tbl, x = ~lon, y = ~lat,
# # text = ~location_value,
# hoverinfo = "none",
# size = ~count+2,
# alpha = 0.9)
#
# fig <- fig %>% add_segments(
# data = flights_location_tbl,
# x = ~geocode_origin_lon, xend = ~geocode_destination_lon,
# y = ~geocode_origin_lat, yend = ~geocode_destination_lat,
# text = ~hover,
# hoverinfo = "text",
# alpha = 0.3,
# size = I(3)
# )
fig <- fig %>%
add_markers(
data = flights_location_tbl, x = ~geocode_origin_lon, y = ~geocode_origin_lat,
# text = ~location_value,
size = ~origin_tally,
hoverinfo = "none",
alpha = 0.5) %>%
add_markers(
data = flights_location_tbl, x = ~geocode_destination_lon, y = ~geocode_destination_lat,
# text = ~location_value,
hoverinfo = "none",
alpha = 0.9) %>%
add_text(data = flights_location_tbl, x = ~geocode_origin_lon, y = ~geocode_origin_lat,
text = ~origin,
color = I("black"),
textposition = "top right") %>%
add_segments(
data = flights_location_tbl,
x = ~geocode_origin_lon, xend = ~geocode_destination_lon,
y = ~geocode_origin_lat, yend = ~geocode_destination_lat,
frame = ~date_of_departure,
text = ~hover,
hoverinfo = "text",
alpha = 0.3,
size = I(3))
fig <- fig %>% layout(
title = 'Flights with confirmed cases (source: NSW Health)',
geo = geo, showlegend = FALSE, height=800
)
fig
Session info
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.2 (2019-12-12)
## os macOS Mojave 10.14.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_AU.UTF-8
## ctype en_AU.UTF-8
## tz Australia/Sydney
## date 2020-03-27
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
## backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.0)
## bitops 1.0-6 2013-08-17 [1] CRAN (R 3.6.0)
## broom 0.5.5 2020-02-29 [1] CRAN (R 3.6.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.6.0)
## cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.0)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
## crosstalk 1.0.0 2016-12-21 [1] CRAN (R 3.6.0)
## curl 4.3 2019-12-02 [1] CRAN (R 3.6.0)
## data.table 1.12.8 2019-12-09 [1] CRAN (R 3.6.0)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 3.6.0)
## dbplyr 1.4.2 2019-06-17 [1] CRAN (R 3.6.0)
## digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.0)
## dplyr * 0.8.5 2020-03-07 [1] CRAN (R 3.6.0)
## ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.1)
## evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0)
## farver 2.0.3 2020-01-16 [1] CRAN (R 3.6.0)
## fastmap 1.0.1 2019-10-08 [1] CRAN (R 3.6.0)
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 3.6.2)
## fs 1.3.2 2020-03-05 [1] CRAN (R 3.6.2)
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.0)
## gganimate * 1.0.5 2020-02-09 [1] CRAN (R 3.6.0)
## ggmap * 3.0.0 2019-02-05 [1] CRAN (R 3.6.0)
## ggplot2 * 3.3.0 2020-03-05 [1] CRAN (R 3.6.2)
## gifski 0.8.6 2018-09-28 [1] CRAN (R 3.6.0)
## glue * 1.3.1 2019-03-12 [1] CRAN (R 3.6.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0)
## haven 2.2.0 2019-11-08 [1] CRAN (R 3.6.0)
## hms 0.5.3 2020-01-08 [1] CRAN (R 3.6.0)
## htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
## htmlwidgets 1.5.1 2019-10-08 [1] CRAN (R 3.6.0)
## httpuv 1.5.2 2019-09-11 [1] CRAN (R 3.6.1)
## httr 1.4.1 2019-08-05 [1] CRAN (R 3.6.0)
## janitor 1.2.1 2020-01-22 [1] CRAN (R 3.6.0)
## jpeg 0.1-8.1 2019-10-24 [1] CRAN (R 3.6.0)
## jsonlite 1.6.1 2020-02-02 [1] CRAN (R 3.6.0)
## knitr 1.28 2020-02-06 [1] CRAN (R 3.6.0)
## later 1.0.0 2019-10-04 [1] CRAN (R 3.6.0)
## lattice 0.20-40 2020-02-19 [1] CRAN (R 3.6.0)
## lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.6.0)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.0)
## lubridate * 1.7.4 2018-04-11 [1] CRAN (R 3.6.0)
## magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
## mime 0.9 2020-02-04 [1] CRAN (R 3.6.0)
## modelr 0.1.6 2020-02-22 [1] CRAN (R 3.6.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0)
## nlme 3.1-145 2020-03-04 [1] CRAN (R 3.6.2)
## pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0)
## plotly * 4.9.2 2020-02-12 [1] CRAN (R 3.6.0)
## plyr 1.8.6 2020-03-03 [1] CRAN (R 3.6.2)
## png 0.1-7 2013-12-03 [1] CRAN (R 3.6.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 3.6.0)
## progress 1.2.2 2019-05-16 [1] CRAN (R 3.6.0)
## promises 1.1.0 2019-10-04 [1] CRAN (R 3.6.0)
## purrr * 0.3.3 2019-10-18 [1] CRAN (R 3.6.0)
## R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0)
## Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.0)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 3.6.0)
## reprex 0.3.0 2019-05-16 [1] CRAN (R 3.6.0)
## RgoogleMaps 1.4.5.3 2020-02-12 [1] CRAN (R 3.6.0)
## rjson 0.2.20 2018-06-08 [1] CRAN (R 3.6.0)
## rlang 0.4.5 2020-03-01 [1] CRAN (R 3.6.2)
## rmarkdown 2.1 2020-01-20 [1] CRAN (R 3.6.0)
## rstudioapi 0.11 2020-02-07 [1] CRAN (R 3.6.0)
## rvest * 0.3.5 2019-11-08 [1] CRAN (R 3.6.0)
## scales 1.1.0 2019-11-18 [1] CRAN (R 3.6.0)
## selectr 0.4-2 2019-11-20 [1] CRAN (R 3.6.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
## shiny 1.4.0 2019-10-10 [1] CRAN (R 3.6.0)
## snakecase 0.11.0 2019-05-25 [1] CRAN (R 3.6.0)
## sp 1.4-1 2020-02-28 [1] CRAN (R 3.6.0)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.1)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
## tibble * 2.1.3 2019-06-06 [1] CRAN (R 3.6.0)
## tidyr * 1.0.2 2020-01-24 [1] CRAN (R 3.6.0)
## tidyselect 1.0.0 2020-01-27 [1] CRAN (R 3.6.0)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 3.6.0)
## tweenr 1.0.1 2018-12-14 [1] CRAN (R 3.6.0)
## vctrs 0.2.4 2020-03-10 [1] CRAN (R 3.6.2)
## viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.6.0)
## withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0)
## xfun 0.12 2020-01-13 [1] CRAN (R 3.6.0)
## xml2 * 1.2.4 2020-03-10 [1] CRAN (R 3.6.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 3.6.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.1)
##
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library