This vignettes provides few examples on how to join attribute data from common sources of attribute data. Here we are using data from three different sources of which two are from Statistics Finland PxWeb-api, namely municipality key figures and Paavo (Open data by postal code area). Third is Covid-19 tests and confirmed cases in Finland.
Installation
geofi
can be installed from CRAN using
# install from CRAN
install.packages("geofi")
# Install development version from GitHub
::install_github("ropengov/geofi") remotes
# Let's first create a function that checks if the suggested
# packages are available
<- function(pkgs){
check_namespaces return(all(unlist(sapply(pkgs, requireNamespace,quietly = TRUE))))
}
Municipality data provided by
get_municipalities()
-function contains 77 indicators
variables from each of 309 municipalities. Variables can be used either
for aggregating data or as keys for joining attribute data.
In this first example we join municipality level data from Statistics Finland municipality key figures
library(geofi)
<- get_municipalities(year = 2019)
muni
<- c("pxweb","dplyr","tidyr","janitor","ggplot2")
libs if (check_namespaces(pkgs = libs)) {
library(pxweb)
<-
pxweb_query_list list("Alue 2020"=c("*"),
"Tiedot"=c("*"),
"Vuosi"=c("2019"))
<-
px_raw pxweb_get(url = "https://pxdata.stat.fi/PXWeb/api/v1/en/Kuntien_avainluvut/2020/kuntien_avainluvut_2020_aikasarja.px",
query = pxweb_query_list)
library(dplyr)
library(tidyr)
library(janitor)
library(sf)
<- as_tibble(
px_data as.data.frame(px_raw,
column.name.type = "text",
variable.value.type = "text")
%>% setNames(make_clean_names(names(.))) %>%
) pivot_longer(names_to = "information", values_to = "municipal_key_figures", 3:ncol(.))
px_dataelse {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}#> # A tibble: 12,800 × 4
#> region_2020 year information municipal_key_figures
#> <chr> <chr> <chr> <dbl>
#> 1 WHOLE COUNTRY 2019 degree_of_urbanisation_percent 86.4
#> 2 WHOLE COUNTRY 2019 population 5525292
#> 3 WHOLE COUNTRY 2019 population_change_from_the_previou… 0.1
#> 4 WHOLE COUNTRY 2019 share_of_persons_aged_under_15_of_… 15.8
#> 5 WHOLE COUNTRY 2019 share_of_persons_aged_15_to_64_of_… 62
#> 6 WHOLE COUNTRY 2019 share_of_persons_aged_over_64_of_t… 22.3
#> 7 WHOLE COUNTRY 2019 share_of_swedish_speakers_of_the_p… 5.2
#> 8 WHOLE COUNTRY 2019 share_of_foreign_citizens_of_the_p… 4.8
#> 9 WHOLE COUNTRY 2019 excess_of_births_persons -8336
#> 10 WHOLE COUNTRY 2019 intermunicipal_migration_gain_loss… 0
#> # ℹ 12,790 more rows
Once we have the data in long format we can observe the
region_2020
-column.
if (check_namespaces(pkgs = libs)) {
count(px_data, region_2020)
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}#> # A tibble: 400 × 2
#> region_2020 n
#> <chr> <int>
#> 1 Akaa 32
#> 2 Alajärvi 32
#> 3 Alavieska 32
#> 4 Alavus 32
#> 5 Asikkala 32
#> 6 Askola 32
#> 7 Aura 32
#> 8 Brändö 32
#> 9 Central Finland 32
#> 10 Central Ostrobothnia 32
#> # ℹ 390 more rows
This is not obvious to all, but have the municipality names in
Finnish among other regional breakdowns which allows us to combine the
data with spatial data using
municipality_name_fi
-variable.
if (check_namespaces(pkgs = libs)) {
<- right_join(muni,
map_data
px_data, by = c("municipality_name_fi" = "region_2020"))
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
Now we can plot a map showing
Share of Swedish-speakers of the population, %
and
Share of foreign citizens of the population, %
on two
panels sharing a scale.
if (check_namespaces(pkgs = libs)) {
library(ggplot2)
%>%
map_data filter(grepl("swedish|foreign", information)) %>%
ggplot(aes(fill = municipal_key_figures)) +
geom_sf() +
facet_wrap(~information) +
theme(legend.position = "top")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
in early 2021 we are still troubled by the COVID-19 and the health authorities are counting infections, deaths and vaccinated. Lets pull the daily data from API and compare the names of the health districts both in COVID-19 data and in municipality division from Statistics Finland.
if (FALSE){
library(readr)
cols(
Area = col_character(),
Time = col_date(format = ""),
val = col_double()
-> cov_cols
)
<- "https://sampo.thl.fi/pivot/prod/en/epirapo/covid19case/fact_epirapo_covid19case.csv?row=dateweek20200101-508804L&column=hcdmunicipality2020-445222L"
thl_korona_api <- httr::status_code(httr::GET(thl_korona_api))
status
<- read_csv2(thl_korona_api, col_types = cov_cols)
xdf_raw <- xdf_raw %>%
xdf # filter(!grepl("Kaikki", Alue)) %>%
rename(date = Time,
shp = Area,
day_cases = val) %>%
group_by(shp) %>%
arrange(shp,date) %>%
filter(!is.na(day_cases)) %>%
mutate(total_cases = cumsum(day_cases)) %>%
ungroup() %>%
group_by(shp) %>%
filter(date == max(date, na.rm = TRUE)) %>%
ungroup()
} <- structure(list(shp = c("Åland", "All areas", "Central Finland Hospital District",
xdf "Central Ostrobothnia Hospital District", "Helsinki and Uusimaa Hospital District",
"Itä-Savo Hospital District", "Kainuu Hospital District", "Kanta-Häme Hospital District",
"Kymenlaakso Hospital District", "Länsi-Pohja Hospital District",
"Lappi Hospital District", "North Karelia Hospital District",
"North Ostrobothnia Hospital District", "North Savo Hospital District",
"Päijät-Häme Hospital District", "Pirkanmaa Hospital District",
"Satakunta Hospital District", "South Karelia Hospital District",
"South Ostrobothnia Hospital District", "South Savo Hospital District",
"Southwest Finland Hospital District", "Vaasa Hospital District"
date = structure(c(18674, 18674, 18674, 18674, 18674, 18674,
), 18674, 18674, 18674, 18674, 18674, 18674, 18674, 18674, 18674,
18674, 18674, 18674, 18674, 18674, 18674, 18674), class = "Date"),
day_cases = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), total_cases = c(120, 51047, 1850, 177,
29519, 141, 227, 873, 854, 478, 465, 516, 2265, 850, 1243,
2662, 671, 348, 534, 573, 4803, 1878)), row.names = c(NA,
-22L), class = c("tbl_df", "tbl", "data.frame"))
%>%
xdf count(shp)
#> # A tibble: 22 × 2
#> shp n
#> <chr> <int>
#> 1 All areas 1
#> 2 Central Finland Hospital District 1
#> 3 Central Ostrobothnia Hospital District 1
#> 4 Helsinki and Uusimaa Hospital District 1
#> 5 Itä-Savo Hospital District 1
#> 6 Kainuu Hospital District 1
#> 7 Kanta-Häme Hospital District 1
#> 8 Kymenlaakso Hospital District 1
#> 9 Lappi Hospital District 1
#> 10 Länsi-Pohja Hospital District 1
#> # ℹ 12 more rows
<- get_municipalities(year = 2021)
muni %>%
muni st_drop_geometry() %>%
count(sairaanhoitop_name_en)
#> sairaanhoitop_name_en n
#> 1 Central Finland Hospital District 21
#> 2 Central Ostrobothnia Hospital District 10
#> 3 Helsinki and Uusimaa Hospital District 24
#> 4 Itä-Savo Hospital District 4
#> 5 Kainuu Hospital District 8
#> 6 Kanta-Häme Hospital District 11
#> 7 Kymenlaakso Hospital District 6
#> 8 Lappi Hospital District 15
#> 9 Länsi-Pohja Hospital District 6
#> 10 North Karelia Hospital District 13
#> 11 North Ostrobothnia Hospital District 29
#> 12 North Savo Hospital District 18
#> 13 Pirkanmaa Hospital District 23
#> 14 Päijät-Häme Hospital District 12
#> 15 Satakunta Hospital District 16
#> 16 South Karelia Hospital District 9
#> 17 South Ostrobothnia Hospital District 18
#> 18 South Savo Hospital District 9
#> 19 Southwest Finland Hospital District 28
#> 20 Vaasa Hospital District 13
#> 21 Åland 16
The names look identical so we can join the two datasets and plot a map.
<- c("ggplot2")
libs if (check_namespaces(pkgs = libs)) {
%>%
muni count(sairaanhoitop_name_en) %>%
left_join(xdf, by = c("sairaanhoitop_name_en" = "shp")) %>%
ggplot(aes(fill = total_cases)) +
geom_sf() +
geom_sf_text(aes(label = paste0(sairaanhoitop_name_en, "\n", total_cases)),
color = "white") +
labs(title = "Number of total COVID-19 cases reported since January 2020",
fill = NULL)
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}
You can download data from Paavo
(Open data by postal code area) using pxweb
-package in
a similar manner as in the first example.
<- c("ggplot2","pxweb","janitor")
libs if (check_namespaces(pkgs = libs)) {
library(pxweb)
# lets get all zipcodes and all variables
<-
pxweb_query_list list("Postinumeroalue"=c("*"),
"Tiedot"=c("he_vakiy"),
"Vuosi"=c("2021"))
<-
px_raw pxweb_get(url = "https://statfin.stat.fi/PXWeb/api/v1/en/Postinumeroalueittainen_avoin_tieto/uusin/paavo_pxt_12ey.px",
query = pxweb_query_list)
<- as_tibble(
px_data as.data.frame(px_raw,
column.name.type = "text",
variable.value.type = "text")
%>% setNames(make_clean_names(names(.)))
) %>%
px_data filter(postal_code_area != "Finland")
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}#> # A tibble: 3,027 × 3
#> postal_code_area year inhabitants_total_he
#> <chr> <chr> <dbl>
#> 1 WHOLE COUNTRY 2021 5548241
#> 2 00100 Helsinki keskusta - Etu-Töölö (Helsinki) 2021 17893
#> 3 00120 Punavuori - Bulevardi (Helsinki) 2021 7230
#> 4 00130 Kaartinkaupunki (Helsinki) 2021 1716
#> 5 00140 Kaivopuisto - Ullanlinna (Helsinki) 2021 7845
#> 6 00150 Punavuori - Eira - Hernesaari (Helsinki) 2021 9368
#> 7 00160 Katajanokka (Helsinki) 2021 4932
#> 8 00170 Kruununhaka (Helsinki) 2021 7191
#> 9 00180 Kamppi - Ruoholahti (Helsinki) 2021 14440
#> 10 00190 Suomenlinna (Helsinki) 2021 694
#> # ℹ 3,017 more rows
Before we can join the data, we must extract the numerical postal
code from postal_code_area
-variable.
<- c("ggplot2","pxweb","janitor")
libs if (check_namespaces(pkgs = libs)) {
$posti_alue <- sub(" .+$", "", px_data$postal_code_area)
px_data
# Lets join with spatial data and plot the area of each zipcode
<- get_zipcodes(year = 2019)
zipcodes19 <- left_join(zipcodes19,
zipcodes_map
px_data)ggplot(zipcodes_map) +
geom_sf(aes(fill = inhabitants_total_he),
color = alpha("white", 1/3)) +
labs(title = "Total number of inhabitants, 2021",
fill = NULL)
else {
} message("One or more of the following packages is not available: ",
paste(libs, collapse = ", "))
}