library(tidyverse)
library(data.table)
library(janitor)
library(tmap)
library(patchwork)
library(sf)
library(ggiraph)
library(viridis) # For scale_fill_viridis_c
# Load the pre-downloaded data
github_data <- readRDS("data/github_innovation_data.rds")
developers <- github_data$developers
git_pushes <- github_data$git_pushes
git_repos <- github_data$git_repos
programming_languages <- github_data$programming_languages
iso3 <- github_data$iso3
merge_by_devs <- c("iso2_code", "year", "quarter")
merge_fun_devs <- function(devs, df, by.vars = merge_by_devs) {
merge(devs, df, by = by.vars)
}
git_pushes <- merge_fun_devs(developers, git_pushes)
git_repos <- merge_fun_devs(developers, git_repos)
programming_languages <- merge_fun_devs(developers, programming_languages)
# filter of HTML, CSS, Shell
programming_languages <- programming_languages[!language %in% c("HTML", "CSS", "Shell")]
github_data_name <- c("developers", "git_pushes", "git_repos", "programming_languages")
# iso3 data is already loaded from the RDS file
iso3 <- iso3[, .(iso2_code = `alpha-2`, iso_a3 = `alpha-3`)]
data("World") # from tmap
# filter for africa
africa <- World %>%
left_join(iso3,by = "iso_a3") %>%
filter(continent == "Africa")
# Keep africa as an sf object (do NOT convert to data.table).
# Converting to data.table strips sf attributes like CRS.
if (is.na(sf::st_crs(africa))) {
sf::st_crs(africa) <- 4326
}
get_latest_year_and_quarter <- function(df, year_column, quarter_column, concactenate = TRUE) {
# Calculate the latest year with available data
latest_year <- max(df[[year_column]], na.rm = TRUE)
# Calculate the latest quarter for the latest year
latest_quarter <- max(df[df[[year_column]] == latest_year][[quarter_column]], na.rm = TRUE)
# Check if the user wants to return a concatenated contact string
if (concactenate) {
# Return the concatenated string of latest year and quarter
return(paste0(latest_year, "-", latest_quarter))
} else {
# Return a named list containing the latest year and quarter
return(list(latest_year = latest_year, latest_quarter = latest_quarter))
}
}
# Define the function
process_df_to_sf <- function(df, spatial_df,
iso2_column_name = "iso2_code",
year_column = "year",
quarter_column = "quarter",
pop_est_column = "pop_est",
freq_column = "developers",
geometry_column = "geometry",
group_col = NULL,
per = 100000,
round_digits = 0) {
# Ensure spatial_df is sf so CRS/geometry are preserved through joins
if (!inherits(spatial_df, "sf")) {
spatial_df <- sf::st_as_sf(spatial_df)
}
# Convert df to a data.table (for fast filtering)
data.table::setDT(df)
# Filter for the latest year and quarter
latest_year_quarter <- get_latest_year_and_quarter(df, year_column, quarter_column, concactenate = FALSE)
latest_year <- latest_year_quarter$latest_year # max(df[[year_column]], na.rm = TRUE)
latest_quarter <- latest_year_quarter$latest_quarter # max(df[get(year_column) == latest_year][[quarter_column]], na.rm = TRUE) get
# print(latest_year)
# print(latest_quarter)
latest_df <- df[get(year_column) == latest_year & get(quarter_column) == latest_quarter]
# print(head(latest_df))
# Join with the spatial data frame (use sf-preserving joins)
latest_df <- as.data.frame(latest_df)
if (!is.null(group_col)) {
latest_df <- latest_df[!is.na(latest_df[[group_col]]), , drop = FALSE]
dfs <- split(latest_df, latest_df[[group_col]])
latest_df_sf <- dplyr::bind_rows(lapply(names(dfs), function(group_value) {
x <- dfs[[group_value]]
out <- dplyr::left_join(spatial_df, x, by = stats::setNames(iso2_column_name, iso2_column_name))
out[[group_col]] <- group_value
out
}))
} else {
latest_df_sf <- dplyr::left_join(spatial_df, latest_df, by = stats::setNames(iso2_column_name, iso2_column_name))
}
# Fill missing values and compute per-capita metric
if (!freq_column %in% names(latest_df_sf)) {
stop(paste0("Expected column '", freq_column, "' not found after join."))
}
latest_df_sf[[freq_column]][is.na(latest_df_sf[[freq_column]])] <- 0
latest_df_sf$devs_per_100k <- round(
(latest_df_sf[[freq_column]] / latest_df_sf[[pop_est_column]]) * per,
digits = round_digits
)
latest_df_sf$devs_per_100k[is.na(latest_df_sf$devs_per_100k)] <- 0
return(latest_df_sf)
}
# Define the function with tidy evaluation for aes()
create_interactive_map <- function(data_sf,
fill_var,
tooltip_var,
facet_var = NULL,
plot_title = "Developers per 100,000 people",
return_girafe = TRUE,
aspect_ratio = 1,
point_size = 9) {
# Create the ggplot
p <- ggplot(data_sf, aes(fill = {{fill_var}}, tooltip = paste0({{tooltip_var}}, ":", {{fill_var}}) )) +
geom_sf_interactive() +
scale_fill_viridis_c() +
theme_void() +
theme(legend.position = "left",
legend.key.width = unit(0.1, 'cm'),
plot.title = element_text(size = 11),
aspect.ratio = aspect_ratio,
plot.margin = unit(c(0,0,0,0), "cm"))+
labs(title = plot_title, fill = "")
# Dynamically add facets if a facet_var is provided
if (!is.null(facet_var)) {
# Use the .data pronoun to refer to variables for faceting
p <- p + facet_wrap(vars(.data[[facet_var]]))
}
# Generate the interactive map
if (return_girafe){
girafe_code <- girafe(ggobj = p, pointsize = point_size)
return(girafe_code)
} else {
return(p)
}
}
# Example usage:
# Assuming 'latest_developers_sf' is your sf dataframe and has 'devs_per_100k' for fill and 'name' for tooltip
# create_interactive_map(latest_developers_sf, fill_var = devs_per_100k, tooltip_var = paste0(name, ":", devs_per_100k))
# To add facets, simply add the facet_var argument like facet_var = "region"
library(ggplot2)
library(ggiraph)
library(stringr) # For str_wrap
plot_interactive_bar <- function(data, x_var, y_var,
fill_var,
title = "Top programming languages by country",
xlab = "Country",
ylab = "% Number of pushers") {
# Create the plot
top_countries_plot <- ggplot(
data,
aes(
x = {{ x_var }}, y = {{ y_var }}, fill = {{ fill_var }},
tooltip = paste0({{ x_var }}, ", ", {{ fill_var }}, " ", {{ y_var }})
)
) +
geom_bar_interactive(stat = "identity", position = "dodge", width = 0.5) +
theme_minimal() +
labs(
title = title,
fill = "",
y = ylab,
x = xlab
) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
scale_fill_brewer(type = "qual", palette = "Dark2") +
theme(legend.position = "bottom")
# Return the interactive plot
girafe(ggobj = top_countries_plot, pointsize = 9)
}
## donhut chart
donhunt_chart <- function(data, labelColumn, valueColumn, chartTitle) {
plot_ly(data, labels = ~ get(labelColumn), values = ~ get(valueColumn)) %>%
add_pie(hole = 0.6) %>%
layout(
title = chartTitle,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)
)
}