library(plotly)
library(tidyverse)
library(data.table)
library(sf)
library(DT)
library(tmap)
library(rKenyaCensus)
Kenya Census Data
# kenya_5yr_births <- readGoogleSheet("https://docs.google.com/spreadsheets/d/e/2PACX-1vSf-yNXNk68cIOH0Rb6alaDk9SxKEMm3h6kb2p8jxT8oMdfj4OqUDbs2Ln9OOGGCI9V7SNiZJCDWm4H/pubhtml")
#
# kenya_5yr_births <- cleanGoogleTable(kenya_5yr_births , table = 1, skip = 1 ) %>%
# setDT()
data(V4_T2.40)
= V4_T2.40
kenya_5yr_births setDT(kenya_5yr_births)
<- names(kenya_5yr_births)
old_nms_births <- gsub("\\s", "_", old_nms_births) %>%
new_nms_births tolower()
setnames(kenya_5yr_births, old_nms_births, new_nms_births)
<- c("total", "notified", "not_notified",
numerics_nms "don’t_know", "not_stated", "percent_notified")
:= lapply(.SD, function(x) gsub(",", "", x)), .SDcols = numerics_nms]
kenya_5yr_births[, (numerics_nms) := lapply(.SD, as.numeric), .SDcols = numerics_nms] kenya_5yr_births[, (numerics_nms)
<- st_read("County") kenya_counties
Reading layer `County' from data source
`/home/mburu/r_projects/m-mburu.github.io/kenya_population/County'
using driver `ESRI Shapefile'
Simple feature collection with 47 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 33.91182 ymin: -4.702271 xmax: 41.90626 ymax: 5.430648
Geodetic CRS: WGS 84
ggplot(kenya_counties)+
geom_sf() +
theme_void()
setnames(kenya_counties, "COUNTY", "county")
:= tolower(gsub("\\s", "-", county))]
kenya_5yr_births[, county == "elgeyo-marakwet", county := "keiyo-marakwet" ]
kenya_5yr_births[county == "nairobi-city" , county := "nairobi"]
kenya_5yr_births[county == "tharaka-nithi" , county := "tharaka" ]
kenya_5yr_births[county == "homabay" , county := "homa-bay" ]
kenya_5yr_births[county setDT(kenya_counties)
:= tolower(gsub("\\s", "-", county))]
kenya_counties[, county
<- merge(kenya_counties, kenya_5yr_births, by = "county" )
kenya_births_counties setnames(kenya_births_counties, "percent_notified", "per_cent_in_health_facility")
:= paste(county, " ", per_cent_in_health_facility,"%", sep = "" )]
kenya_births_counties[, county
<- st_set_geometry(kenya_births_counties, "geometry") kenya_births_counties
<- tm_shape(kenya_births_counties)+
map1 tm_borders(col = "gold")+
tm_polygons(col = "per_cent_in_health_facility")+
tm_layout(title = "% Health facility births",
title.position = c(0.3, "top"))
tmap_leaflet(map1)
<- fread("distribution-of-population-by-age-and-sex-kenya-2019-census-volume-iii.csv") kenyan_pop
<- c("Male", "Female", "Intersex")
numerics_nms := lapply(.SD, function(x) gsub(",", "", x)), .SDcols = numerics_nms]
kenyan_pop[, (numerics_nms) := lapply(.SD, as.numeric), .SDcols = numerics_nms]
kenyan_pop[, (numerics_nms) := lapply(.SD, function(x) ifelse(is.na(x), 0, x)), .SDcols = numerics_nms]
kenyan_pop[, (numerics_nms) := gsub("Sep", "09", Age)]
kenyan_pop[, Age := gsub("Oct", "10", Age)]
kenyan_pop[, Age := Reduce(`+`, .SD),.SDcols= numerics_nms] kenyan_pop[,Total
<- kenyan_pop[!grepl("-", Age)]
age_cat
is.na(age_cat)] <- NA
age_cat[#age_cat[,Total:= Reduce(`+`, .SD),.SDcols= numerics_nms]
<- melt(age_cat, id.vars = c("Age", "Total"), variable.name = "Sex")
age_cat_m := round(value/Total*100, 2)] age_cat_m[, Perc
library(ggthemes)
<- age_cat_m[Age != "Total"]
age_cat_m := as.numeric(Age)]
age_cat_m[, Age <- ggplot(age_cat_m, aes(Age, Perc, group = Sex))+
pop_plot geom_line(aes(color = Sex)) +
labs(y = "Percentage of Population")+
scale_x_continuous(breaks = seq(0, 100, by = 10))+
scale_color_viridis_d()+
theme_hc()
ggplotly(pop_plot)
is.na(Age), Age := 100]
age_cat[
:= as.numeric(Age)]
age_cat[, Age := cut(Age,
age_cat[, age_factor breaks = c(0, 5, 15, 20, 30, 40, 50, 65, 100 ),
include.lowest = T,
labels = c("0-5", "6-15", "16-20",
"20-30", "31-40", "41,50]",
"51-65", "> 65"))]
<- age_cat[, .(sum_total = sum(Total)), by = age_factor] %>%
age_cat_sum := round(sum_total/sum(sum_total)*100,2)] .[, Perc
Age Summary Stats
<- age_cat[!is.na(Age)]
age_cat <- list()
list_df for (i in 1:nrow(age_cat)) {
<- data.table (Age = rep(age_cat[i,Age], age_cat[i, Total]))
list_df[[i]]
}
<- rbindlist(list_df)
dt_age
Mean = round(mean(Age),1),
dt_age[, .(Median = median(Age),
First_quartile = quantile(Age, 0.25),
Third_quartile = quantile(Age, 0.75))] %>%
datatable()
library(scales)
ggplot(dt_age, aes(Age)) +
geom_histogram(binwidth = 5) +
labs(y = "Population")+
theme_economist()+
scale_y_continuous(label=comma, breaks = seq(from = 0, to = 6500000, by = 500000))+
labs(title = "Kenyan population 2019 census")+
theme(
axis.text = element_text(size =12, face = "bold"),
axis.title = element_text(size =12, face = "bold"))
#ggplotly(pop_plot2)
library(knitr)
<-fread("distribution-of-households-growing-other-crops-by-type-county-and-sub-county-2019-census-volume-.csv")
crops setnames(crops, "County/Sub County", "county")
<- melt(crops, id.vars = c("county", "Farming"),
crops_m variable.factor = F, variable.name = "crops",
value.name = "households")
<- c("Farming", "households")
nms := lapply(.SD, function(x) gsub(",", "", x)), .SDcols = nms]
crops_m[, (nms) := lapply(.SD, as.numeric), .SDcols = nms]
crops_m[, (nms) := lapply(.SD, function(x) ifelse(is.na(x), 0, x)), .SDcols = nms]
crops_m[, (nms) := round(households/Farming * 100, 2)]
crops_m[, Perc
set.seed(100)
sample(1:nrow(crops_m),20)] %>% datatable() crops_m[
<-crops_m[county == "KENYA"]
kenya_crops
setorder(kenya_crops, -Perc)
datatable(kenya_crops)
<- ggplot(kenya_crops, aes(reorder(crops, Perc), Perc))+
crops_bar geom_bar(stat = "identity")+
theme_economist()+
labs(y = "Percentage of households", x = "Crops",
title = "Percentage of Kenyan households growing crops")+
theme_hc()+
coord_flip()
ggplotly(crops_bar)
#crops_m[, county := tolower(county)]
:= tolower(gsub("\\s", "-", county))]
crops_m[, county
== "elgeyo/marakwet", county := "keiyo-marakwet"]
crops_m[county == "tharaka---nithi", county := "tharaka"]
crops_m[county == "taita/taveta", county := "taita-taveta"]
crops_m[county
<- merge(crops_m, kenya_counties, by = "county")
crops_counties_dat
<- unique(crops_counties_dat, by = c("county","Farming"))
farm
<- farm %>% group_by(county) %>%
farm filter(Farming == max(Farming)) %>%
setDT()
<- merge(crops_counties_dat,
crops_counties_dat
farm[, .(county, Farming)], by = c("county", "Farming"))
<- crops_counties_dat %>%
pop_crops_counties group_by(county) %>%
filter(Perc == max(Perc)) %>%
ungroup()%>%
mutate(county = paste0(county," ", crops," ", Perc, "%"))
<- st_set_geometry(pop_crops_counties, "geometry") pop_crops_counties
Popular crop grown per county Kenya
<- tm_shape(pop_crops_counties)+
map2 tm_borders(col = "gold")+
tm_fill(col = "crops")+
tm_bubbles(size = "Perc", col = "gold")+
tm_layout(main.title = "Popular crop grown per county Kenya",
title.position = c(0.2, "top"))
<- tmap_leaflet(map2)
map2
map2
<- crops_counties_dat %>%
pop_crops_counties_not_maize group_by(county) %>%
filter(crops != "Maize") %>%
filter(Perc == max(Perc)) %>%
ungroup()%>%
mutate(county = paste0(county," ", crops," ", Perc, "%"))
<- st_set_geometry(pop_crops_counties_not_maize, "geometry") pop_crops_counties_not_maize
Popular crop grown per county Kenya apart from maize
<- tm_shape(pop_crops_counties_not_maize)+
map3 tm_borders(col = "gold")+
tm_fill(col = "crops")
#tm_bubbles(size = "Perc", col = "gold")+
# tm_layout(title = "Popular crop grown per county Kenya apart from maize",
# title.position = c(0.2, "top"))
tmap_leaflet(map3)
<- crops_counties_dat %>%
pop_crops_counties_not_mbp group_by(county) %>%
filter(!crops %in% c("Maize", "Beans", "Potatoes")) %>%
filter(Perc == max(Perc)) %>%
ungroup()%>%
mutate(county = paste0(county," ", crops," ", Perc, "%"))
<- st_set_geometry(pop_crops_counties_not_mbp, "geometry") pop_crops_counties_not_mbp
Popular crop grown per county Kenya apart from maize, beans or potatoes
<- tm_shape(pop_crops_counties_not_mbp)+
map4 tm_borders(col = "gold")+
tm_fill(col = "crops")+
#tm_bubbles(size = "Perc", col = "gold")+
tm_layout(main.title = "Popular crop grown per county Kenya apart from maize, beans or potatoes",
title.position = c(0.2, "top"))
tmap_leaflet(map4)
<- fread("distribution-of-households-practicing-agriculture-fishing-and-irrigation-by-county-and-sub-count.csv") kenya_subcounties_agri