1 Loading packages

library(tidyverse)
library(rvest)
library(xml2)
library(lubridate)
library(gganimate)
library(ggmap)
library(plotly)
library(glue)

2 Loading data

url = "https://www.health.nsw.gov.au/Infectious/diseases/Pages/coronavirus-flights.aspx"
raw = xml2::read_html(url)

## https://stackoverflow.com/questions/52855989/scrape-aspx-page-with-r
raw_flights_tbl = raw %>%
  rvest::html_node(xpath = ".//div[@id='ctl00_PlaceHolderMain_contentc1__ControlWrapper_RichHtmlField']/table") %>%
  rvest::html_table() %>% 
  as_tibble() %>% 
  janitor::clean_names()

3 Clean data

flights_tbl = raw_flights_tbl %>% 
  dplyr::mutate(date_of_departure = date_of_departure %>% dmy() %>% as_datetime(),
                date_of_arrival = date_of_arrival %>% dmy() %>% as_datetime()) %>%
  tidyr::separate(col = origin_destination, 
                  into = c("origin", "destination"),
                  sep = "/") %>% 
  tibble::rowid_to_column("plane_id") ## Avoid repeated flight_number

4 Location query through Google Maps (API needed)

all_geocode = tibble(
  location = c(flights_tbl$origin, flights_tbl$destination) %>% unique,
  geocode = purrr::map(location, ggmap::geocode))

saveRDS(all_geocode, file = "data/all_geocode.rds")
all_geocode = readRDS("data/all_geocode.rds")

flights_location_tbl = flights_tbl %>% 
  left_join(all_geocode, by = c("origin" = "location")) %>% 
  left_join(all_geocode, by = c("destination" = "location"), 
            suffix = c("_origin", "_destination")) %>% 
  unnest(c(geocode_origin, geocode_destination), names_sep = "_") %>% 
  group_by(origin, destination) %>% 
  dplyr::mutate(
    collapse_flights = paste(flight %>% unique, collapse = ", "),
    hover = glue("Origin: {origin} \n Dest: {destination} \n Flight: {collapse_flights}")) %>% 
  group_by(origin) %>% 
  dplyr::mutate(origin_tally = n()) %>% 
  ungroup() %>% 
  dplyr::mutate(date_of_departure = as.character(date_of_departure))

# all_location_tbl = flights_tbl %>% 
#   dplyr::select(origin, destination) %>% 
#   group_by(origin, destination) %>% 
#   dplyr::mutate(count = n()) %>% 
#   pivot_longer(cols = c("origin", "destination"),
#                names_to = "location_type",
#                values_to = "location_value") %>% 
#   left_join(all_geocode, by = c("location_value" = "location")) %>% 
#   unnest(geocode)

5 Plotly visualisations

geo <- list(
  scope = 'world',
  projection = list(type = 'azimuthal equal area'),
  showland = TRUE,
  landcolor = toRGB("gray95"),
  countrycolor = toRGB("gray80")
)

fig <- plot_geo(locationmode = 'ISO-3', color = I("red"))

# fig <- fig %>% add_markers(
#   data = all_location_tbl, x = ~lon, y = ~lat, 
#   # text = ~location_value, 
#   hoverinfo = "none",
#   size = ~count+2,
#   alpha = 0.9)
# 
# fig <- fig %>% add_segments(
#     data = flights_location_tbl,
#     x = ~geocode_origin_lon, xend = ~geocode_destination_lon,
#     y = ~geocode_origin_lat, yend = ~geocode_destination_lat,
#     text = ~hover,
#     hoverinfo = "text",
#     alpha = 0.3, 
#     size = I(3)
#     )

fig <- fig %>% 
  add_markers(
    data = flights_location_tbl, x = ~geocode_origin_lon, y = ~geocode_origin_lat, 
    # text = ~location_value, 
    size = ~origin_tally,
    hoverinfo = "none",
    alpha = 0.5) %>% 
  add_markers(
    data = flights_location_tbl, x = ~geocode_destination_lon, y = ~geocode_destination_lat, 
    # text = ~location_value, 
    hoverinfo = "none",
    alpha = 0.9) %>% 
  add_text(data = flights_location_tbl, x = ~geocode_origin_lon, y = ~geocode_origin_lat, 
           text = ~origin,
           color = I("black"),
           textposition = "top right") %>% 
  add_segments(
    data = flights_location_tbl,
    x = ~geocode_origin_lon, xend = ~geocode_destination_lon,
    y = ~geocode_origin_lat, yend = ~geocode_destination_lat,
    frame = ~date_of_departure,
    text = ~hover,
    hoverinfo = "text",
    alpha = 0.3, 
    size = I(3))


fig <- fig %>% layout(
  title = 'Flights with confirmed cases (source: NSW Health)',
  geo = geo, showlegend = FALSE, height=800
)

fig

6 Session info

sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.2 (2019-12-12)
##  os       macOS Mojave 10.14.6        
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_AU.UTF-8                 
##  ctype    en_AU.UTF-8                 
##  tz       Australia/Sydney            
##  date     2020-03-27                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version date       lib source        
##  assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.0)
##  backports     1.1.5   2019-10-02 [1] CRAN (R 3.6.0)
##  bitops        1.0-6   2013-08-17 [1] CRAN (R 3.6.0)
##  broom         0.5.5   2020-02-29 [1] CRAN (R 3.6.0)
##  cellranger    1.1.0   2016-07-27 [1] CRAN (R 3.6.0)
##  cli           2.0.2   2020-02-28 [1] CRAN (R 3.6.0)
##  colorspace    1.4-1   2019-03-18 [1] CRAN (R 3.6.0)
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.6.0)
##  crosstalk     1.0.0   2016-12-21 [1] CRAN (R 3.6.0)
##  curl          4.3     2019-12-02 [1] CRAN (R 3.6.0)
##  data.table    1.12.8  2019-12-09 [1] CRAN (R 3.6.0)
##  DBI           1.1.0   2019-12-15 [1] CRAN (R 3.6.0)
##  dbplyr        1.4.2   2019-06-17 [1] CRAN (R 3.6.0)
##  digest        0.6.25  2020-02-23 [1] CRAN (R 3.6.0)
##  dplyr       * 0.8.5   2020-03-07 [1] CRAN (R 3.6.0)
##  ellipsis      0.3.0   2019-09-20 [1] CRAN (R 3.6.1)
##  evaluate      0.14    2019-05-28 [1] CRAN (R 3.6.0)
##  fansi         0.4.1   2020-01-08 [1] CRAN (R 3.6.0)
##  farver        2.0.3   2020-01-16 [1] CRAN (R 3.6.0)
##  fastmap       1.0.1   2019-10-08 [1] CRAN (R 3.6.0)
##  forcats     * 0.5.0   2020-03-01 [1] CRAN (R 3.6.2)
##  fs            1.3.2   2020-03-05 [1] CRAN (R 3.6.2)
##  generics      0.0.2   2018-11-29 [1] CRAN (R 3.6.0)
##  gganimate   * 1.0.5   2020-02-09 [1] CRAN (R 3.6.0)
##  ggmap       * 3.0.0   2019-02-05 [1] CRAN (R 3.6.0)
##  ggplot2     * 3.3.0   2020-03-05 [1] CRAN (R 3.6.2)
##  gifski        0.8.6   2018-09-28 [1] CRAN (R 3.6.0)
##  glue        * 1.3.1   2019-03-12 [1] CRAN (R 3.6.0)
##  gtable        0.3.0   2019-03-25 [1] CRAN (R 3.6.0)
##  haven         2.2.0   2019-11-08 [1] CRAN (R 3.6.0)
##  hms           0.5.3   2020-01-08 [1] CRAN (R 3.6.0)
##  htmltools     0.4.0   2019-10-04 [1] CRAN (R 3.6.0)
##  htmlwidgets   1.5.1   2019-10-08 [1] CRAN (R 3.6.0)
##  httpuv        1.5.2   2019-09-11 [1] CRAN (R 3.6.1)
##  httr          1.4.1   2019-08-05 [1] CRAN (R 3.6.0)
##  janitor       1.2.1   2020-01-22 [1] CRAN (R 3.6.0)
##  jpeg          0.1-8.1 2019-10-24 [1] CRAN (R 3.6.0)
##  jsonlite      1.6.1   2020-02-02 [1] CRAN (R 3.6.0)
##  knitr         1.28    2020-02-06 [1] CRAN (R 3.6.0)
##  later         1.0.0   2019-10-04 [1] CRAN (R 3.6.0)
##  lattice       0.20-40 2020-02-19 [1] CRAN (R 3.6.0)
##  lazyeval      0.2.2   2019-03-15 [1] CRAN (R 3.6.0)
##  lifecycle     0.2.0   2020-03-06 [1] CRAN (R 3.6.0)
##  lubridate   * 1.7.4   2018-04-11 [1] CRAN (R 3.6.0)
##  magrittr      1.5     2014-11-22 [1] CRAN (R 3.6.0)
##  mime          0.9     2020-02-04 [1] CRAN (R 3.6.0)
##  modelr        0.1.6   2020-02-22 [1] CRAN (R 3.6.0)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.6.0)
##  nlme          3.1-145 2020-03-04 [1] CRAN (R 3.6.2)
##  pillar        1.4.3   2019-12-20 [1] CRAN (R 3.6.0)
##  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 3.6.0)
##  plotly      * 4.9.2   2020-02-12 [1] CRAN (R 3.6.0)
##  plyr          1.8.6   2020-03-03 [1] CRAN (R 3.6.2)
##  png           0.1-7   2013-12-03 [1] CRAN (R 3.6.0)
##  prettyunits   1.1.1   2020-01-24 [1] CRAN (R 3.6.0)
##  progress      1.2.2   2019-05-16 [1] CRAN (R 3.6.0)
##  promises      1.1.0   2019-10-04 [1] CRAN (R 3.6.0)
##  purrr       * 0.3.3   2019-10-18 [1] CRAN (R 3.6.0)
##  R6            2.4.1   2019-11-12 [1] CRAN (R 3.6.0)
##  Rcpp          1.0.3   2019-11-08 [1] CRAN (R 3.6.0)
##  readr       * 1.3.1   2018-12-21 [1] CRAN (R 3.6.0)
##  readxl        1.3.1   2019-03-13 [1] CRAN (R 3.6.0)
##  reprex        0.3.0   2019-05-16 [1] CRAN (R 3.6.0)
##  RgoogleMaps   1.4.5.3 2020-02-12 [1] CRAN (R 3.6.0)
##  rjson         0.2.20  2018-06-08 [1] CRAN (R 3.6.0)
##  rlang         0.4.5   2020-03-01 [1] CRAN (R 3.6.2)
##  rmarkdown     2.1     2020-01-20 [1] CRAN (R 3.6.0)
##  rstudioapi    0.11    2020-02-07 [1] CRAN (R 3.6.0)
##  rvest       * 0.3.5   2019-11-08 [1] CRAN (R 3.6.0)
##  scales        1.1.0   2019-11-18 [1] CRAN (R 3.6.0)
##  selectr       0.4-2   2019-11-20 [1] CRAN (R 3.6.0)
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.0)
##  shiny         1.4.0   2019-10-10 [1] CRAN (R 3.6.0)
##  snakecase     0.11.0  2019-05-25 [1] CRAN (R 3.6.0)
##  sp            1.4-1   2020-02-28 [1] CRAN (R 3.6.0)
##  stringi       1.4.6   2020-02-17 [1] CRAN (R 3.6.1)
##  stringr     * 1.4.0   2019-02-10 [1] CRAN (R 3.6.0)
##  tibble      * 2.1.3   2019-06-06 [1] CRAN (R 3.6.0)
##  tidyr       * 1.0.2   2020-01-24 [1] CRAN (R 3.6.0)
##  tidyselect    1.0.0   2020-01-27 [1] CRAN (R 3.6.0)
##  tidyverse   * 1.3.0   2019-11-21 [1] CRAN (R 3.6.0)
##  tweenr        1.0.1   2018-12-14 [1] CRAN (R 3.6.0)
##  vctrs         0.2.4   2020-03-10 [1] CRAN (R 3.6.2)
##  viridisLite   0.3.0   2018-02-01 [1] CRAN (R 3.6.0)
##  withr         2.1.2   2018-03-15 [1] CRAN (R 3.6.0)
##  xfun          0.12    2020-01-13 [1] CRAN (R 3.6.0)
##  xml2        * 1.2.4   2020-03-10 [1] CRAN (R 3.6.2)
##  xtable        1.8-4   2019-04-21 [1] CRAN (R 3.6.0)
##  yaml          2.2.1   2020-02-01 [1] CRAN (R 3.6.1)
## 
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library