Hi
I did update the code from your workshop (https://butterfly-monitoring.github.io/bms_workshop/index.html) to use teh new terra package and discard the use of raster and rasterVis. I did also correct some small issues in your code taht were causing errors. Hope this is usefull
regards
if (!require("data.table")){
install.packages("data.table")
library(data.table)}
if (!require("speedglm")){
install.packages("speedglm")
library(speedglm)}
if (!require("devtools")){
install.packages("devtools")
library(devtools)}
if (!require("ggplot2")){
install.packages("ggplot2")
library(ggplot2)}
if (!require("reshape2")){
install.packages("reshape2")
library(reshape2)}
if (!require("plyr")){
install.packages("plyr")
library(plyr)}
if (!require("sf")){
install.packages("sf")
library(sf)}
if (!require("terra")) {
install.packages("terra")
library(terra)}
if (!require("tmap")){
remotes::install_github('r-tmap/tmap')
##install.packages("tmap")
library(tmap)}
if (!require("DT")){
install.packages("DT")
library(DT)}
rbms from GitHub
if(!requireNamespace("devtools")) {
install.packages("devtools")
devtools::install_github("RetoSchmucki/rbms")
library(rbms)}
library(data.table)
load .rds data, we use the function readRDS()
b_count_sub <- readRDS("bms_workshop_data/work_count.rds") # species data with counts
m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds") # date and time of transects/stations
m_site_sub <- readRDS("bms_workshop_data/work_site.rds") # km coordinates of sites
to read csv into a data.table we use fread()
bcz_class <- data.table::fread("bms_workshop_data/GEnS_v3_classification.csv") # Bioclimate classes
b_count_sub
Extract columns, to store only 'bms_id', the reference to samples, teh year of the record and the full date
my_new_dt <- b_count_sub[ , .(bms_id, transect_id_serial, year, visit_date)]
my_new_dt
Extract unique
unique(my_new_dt$bms_id) # store the unique 'bms_id'
unique(my_new_dt[, .(bms_id, year)]) # store 'bms_id' and 'year'
Subset
my_new_dt[bms_id == "NLBMS", ]
Work with date
my_new_dt[ , month := month(visit_date)][ , c("day", "year") := .(mday(visit_date), year(visit_date))]
my_new_dt
Count object
my_new_dt[ , .N, by = bms_id]
rename column
change names
setnames(b_count_sub, "transect_id_serial", "SITE_ID")
setnames(b_count_sub, c("species_name", "bms_id"), c("SPECIES", "BMS_ID"))
names(b_count_sub) <- toupper(names(b_count_sub))
b_count_sub
merge data set
merge species count data with site coordinate data based on a common identifier (SITE_ID),
demonstrating both inner and left joins.
setnames(m_site_sub, "transect_id_serial", "SITE_ID")
setkey(b_count_sub, SITE_ID)
setkey(m_site_sub, SITE_ID)
merge(b_count_sub, m_site_sub[bms_id == "FRBMS" , .(SITE_ID, transect_lon_1km, transect_lat_1km)])
merge(b_count_sub, m_site_sub[bms_id == "FRBMS" , .(SITE_ID, transect_lon_1km, transect_lat_1km)], all.x = TRUE)
data_m <- merge(b_count_sub, m_site_sub[ , .(SITE_ID, transect_lon_1km, transect_lat_1km)], all.x = TRUE)
data_m
Mapping BMS sites
library(data.table)
library(sf)
library(tmap)
b_count_sub <- readRDS("bms_workshop_data/work_count.rds")
m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds")
m_site_sub <- readRDS("bms_workshop_data/work_site.rds")
Mapping
bcz <- terra::rast("bms_workshop_data/metzger_bcz_genz3.grd")
bb <- sf::st_read("bms_workshop_data/bbox.shp")
bcz_class <- data.table::fread("bms_workshop_data/GEnS_v3_classification.csv")
sf transect location
site_sf <- st_as_sf(m_site_sub, coords = c("transect_lon_1km", "transect_lat_1km"), crs = 3035)
mapview::mapview(site_sf)
To produce a map with the Bioclimate Region, we use the function levelplot() in the rasterVis package
plot point on raster
site_sf_4326 <- st_transform(site_sf, crs = 4326)
bb_4326 <- st_transform(bb, crs = 4326)
Begin the tmap visualization
tm_layout(frame = FALSE) + # Optional: Adjust layout settings as needed
tm_shape(bcz) +
tm_raster(style = "cont", palette = "viridis", title = "Bioclimate") +
tm_shape(site_sf_4326) +
tm_dots(col = "cyan4", size = 0.1) + # Adjust size as needed
tm_shape(bb_4326) +
tm_borders(col = "magenta", lwd = 2, lty = 2)
plot point on raster using tmap
tm_shape(bcz) +
tm_raster(palette = "viridis", alpha = 1) + # Example using a viridis palette
tm_shape(st_geometry(st_transform(site_sf, 4326))) +
tm_symbols(col = "dodgerblue4", size = 0.1, shape = 20) +
tm_shape(st_geometry(st_transform(bb, 4326))) +
tm_borders(lty = 2, col= 'magenta', lwd=2)
Extracting spatial information
site_dt <- data.table(site_sf_4326)
Convert 'site_sf' to 'data.table', ensuring to include all attributes
site_dt <- data.table::setDT(sf::st_drop_geometry(site_sf_4326))
print(names(site_sf_4326))
Assuming 'bcz' is a SpatRaster object loaded with terra
Transform 'site_sf' to match the CRS of 'bcz'
site_sf_4326 <- st_transform(site_sf_4326, crs = crs(bcz))
Assuming 'site_sf_4326' is correctly transformed
Extract values
extracted_values <- terra::extract(bcz, site_sf_4326, exact=TRUE)
Ensure extracted_values is correctly structured for assignment
If bcz has a single layer, extracted_values should be a vector or a single-column data.frame
if (is.data.frame(extracted_values)) {
# Assuming we're interested in the first layer or there's only one
site_dt$GEnZ_seq <- extracted_values[[1]]
} else {
# If it's not a data.frame, it should be directly assignable, but this condition is just for illustration
site_dt$GEnZ_seq <- extracted_values
}
----
setkey(site_dt, GEnZ_seq)
setkey(bcz_class, GEnZ_seq)
site_dt <- merge(site_dt, unique(bcz_class[, .(GEnZ_seq, GEnZname)]), all.x=TRUE)
After extraction and before merge
print(names(site_dt))
Exploring results
library(DT)
datatable(site_dt[ ,.(bms_id, transect_id_serial, GEnZ_seq, GEnZname)], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
---
mapview::mapview(bcz) + mapview::mapview(st_transform(site_sf, 4326))
Phenology in BMS data - flight curve
if(!requireNamespace("devtools")) install.packages("devtools")
devtools::install_github("RetoSchmucki/rbms")
library(data.table)
library(rbms)
b_count_sub <- readRDS("bms_workshop_data/work_count.rds")
m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds")
Explore the data
library(DT)
datatable(b_count_sub[1:50, ], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
datatable(m_visit_sub[1:50, ], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
unique(b_count_sub[ , .(bms_id, species_name)])
unique(b_count_sub[order(year), year])
1. Select a single species
s_sp <- "Polyommatus icarus"
region_bms <- c("NLBMS", "FRBMS")
data(m_visit)
data(m_count)
2. Arrange data set name to fit the functions requirements
setnames(m_visit_sub, c("transect_id_serial", "visit_date"), c("SITE_ID", "DATE"))
names(m_visit_sub) <- toupper(names(m_visit_sub))
setnames(b_count_sub, c("transect_id_serial", "visit_date", "species_name"), c("SITE_ID", "DATE", "SPECIES"))
names(b_count_sub) <- toupper(names(b_count_sub))
3. Prepare a full time-series to align count, visit, monitoring and flight curve estimate,
using the function ts_dwmy_table(), with 'InitYear', 'LastYear' and 'WeekDay1'
ts_date <- rbms::ts_dwmy_table(InitYear = 2008,
LastYear = 2017,
WeekDay1 = "monday")
4. Define the monitoring season with the function ts_monit_season()(), using ts_date, StartMonth, EndMonth, StartDay,
EndDay, CompltSeason, Anchor, AnchorLength, AnchorLag, TimeUnit (“d” or “w”) as arguments
ts_season <- rbms::ts_monit_season(ts_date,
StartMonth = 4,
EndMonth = 9,
StartDay = 1,
EndDay = NULL,
CompltSeason = TRUE,
Anchor = TRUE,
AnchorLength = 2,
AnchorLag = 2,
TimeUnit = "w")
5. Align the site visit in the region of interest with your monitoring season
MY_visit_region <- m_visit_sub[BMS_ID %in% region_bms, ]
ts_season_visit <- rbms::ts_monit_site(ts_season, MY_visit_region)
6. Align the butterfly counts recorded for the species and region of interest with your visit and monitoring season
MY_count_region <- b_count_sub[BMS_ID %in% region_bms, ]
ts_season_count <- rbms::ts_monit_count_site(ts_season_visit, MY_count_region, sp = s_sp)
7. Estimate the flight curve for a species and region of interest,
using cubic spline smoother available in the mgcv package and considering
ts_flight_curve <- rbms::flight_curve(ts_season_count,
NbrSample = 500,
MinVisit = 3,
MinOccur = 2,
MinNbrSite = 5,
MaxTrial = 4,
GamFamily = "nb",
SpeedGam = FALSE,
CompltSeason = TRUE,
SelectYear = NULL,
TimeUnit = "w"
)
saveRDS(ts_flight_curve, file.path("bms_workshop_data", paste(gsub(" ", "", s_sp),
paste(region_bms, collapse=""),
"pheno.rds", sep = "_")))
8. Extract and plot the estimated phenology
ts_flight_curve <- readRDS(file.path("bms_workshop_data", paste(gsub(" ", "", s_sp), paste(region_bms, collapse=""), "pheno.rds", sep = "_")))
datatable(ts_flight_curve$pheno[1:370, .(SPECIES, M_YEAR, MONTH, trimWEEKNO, WEEK_SINCE, ANCHOR, NM)], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))
Plot using base R
Extract phenology part
pheno <- ts_flight_curve$pheno
add the line of the first year
yr <- unique(pheno[order(M_YEAR), as.numeric(as.character(M_YEAR))])
if("trimWEEKNO" %in% names(pheno)){
plot(pheno[M_YEAR == yr[1], trimWEEKNO], pheno[M_YEAR == yr[1], NM], type = 'l', ylim = c(0, max(pheno[, NM])), xlab = 'Monitoring Week', ylab = 'Relative Abundance')
} else {
plot(pheno[M_YEAR == yr[1], trimDAYNO], pheno[M_YEAR == yr[1], NM], type = 'l', ylim = c(0, max(pheno[, NM])), xlab = 'Monitoring Day', ylab = 'Relative Abundance')
}
add individual curves for additional years
if(length(yr) > 1) {
i <- 2
for(y in yr[-1]){
if("trimWEEKNO" %in% names(pheno)){
points(pheno[M_YEAR == y , trimWEEKNO], pheno[M_YEAR == y, NM], type = 'l', col = i)
} else {
points(pheno[M_YEAR == y, trimDAYNO], pheno[M_YEAR == y, NM], type = 'l', col = i)
}
i <- i + 1
}
}
add legend
legend('topright', legend = c(yr), col = c(seq_along(c(yr))), lty = 1, bty = 'n')
Computing site and collated indices
Generalized Abundance Index
library(data.table)
library(rbms)
library(ggplot2)
s_sp <- "Maniola jurtina"
s_sp <- "Polyommatus icarus"
region_bms <- c("NLBMS", "FRBMS")
We will use the count and visit data
b_count_sub <- readRDS("bms_workshop_data/work_count.rds")
m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds")
setnames(m_visit_sub, c("transect_id_serial", "visit_date"), c("SITE_ID", "DATE"))
names(m_visit_sub) <- toupper(names(m_visit_sub))
setnames(b_count_sub, c("transect_id_serial", "visit_date", "species_name"), c("SITE_ID", "DATE", "SPECIES"))
names(b_count_sub) <- toupper(names(b_count_sub))
ts_date <- rbms::ts_dwmy_table(InitYear = 2008,
LastYear = 2017,
WeekDay1 = "monday")
ts_season <- rbms::ts_monit_season(ts_date,
StartMonth = 4,
EndMonth = 9,
StartDay = 1,
EndDay = NULL,
CompltSeason = TRUE,
Anchor = TRUE,
AnchorLength = 2,
AnchorLag = 2,
TimeUnit = "w")
MY_visit_region <- m_visit_sub[BMS_ID %in% region_bms, ]
ts_season_visit <- rbms::ts_monit_site(ts_season, MY_visit_region)
MY_count_region <- b_count_sub[BMS_ID %in% region_bms, ]
ts_season_count <- rbms::ts_monit_count_site(ts_season_visit, MY_count_region, sp = s_sp)
Impute & Site index
impute values for missing week counts
and adequately compute sites abundance index over the entire monitoring season
impt_counts <- rbms::impute_count(ts_season_count = ts_season_count,
ts_flight_curve = pheno,
YearLimit = NULL,
TimeUnit = "w")
sindex <- rbms::site_index(butterfly_count = impt_counts, MinFC = 0.10)
saveRDS(sindex, file.path("bms_workshop_data", paste( gsub(" ", "", s_sp),
paste(region_bms, collapse=""),
"sindex.rds", sep="_")))
Collated index, using the collated_index function in rbms to estimate annual abundance indices for a wider region
we will focus on the sites monitored in the Netherlands (NLBMS) and compute a collated index for Polyommatus icarus
accross the site monitored in that region over the 10-year period.
bms_id <- "NLBMS"
sindex <- readRDS(file.path("bms_workshop_data", paste( gsub(" ", "", s_sp),
paste(region_bms, collapse=""),
"sindex.rds", sep="_")))
sindex[substr(SITE_ID, 1, 5) == bms_id, ]
co_index <- collated_index(data = sindex[substr(SITE_ID, 1, 5) == bms_id, ],
s_sp = s_sp,
sindex_value = "SINDEX",
glm_weights = TRUE,
rm_zero = TRUE)
co_index$col_index
Bootstrap CI
bms_id <- "NLBMS"
set.seed(125361)
bootsample <- rbms::boot_sample(sindex[substr(SITE_ID, 1, 5) == bms_id, ], boot_n = 500)
co_index <- list()
for progression bar, uncomment the following
pb <- txtProgressBar(min = 0, max = dim(bootsample$boot_ind)[1], initial = 0, char = "*", style = 3)
for (i in c(0, seq_len(dim(bootsample$boot_ind)[1]))) {
co_index[[i + 1]] <- rbms::collated_index(data = sindex[substr(SITE_ID, 1, 5) == bms_id, ],
s_sp = s_sp,
sindex_value = "SINDEX",
bootID = i,
boot_ind = bootsample,
glm_weights = TRUE,
rm_zero = TRUE)
## for progression bar, uncomment the following
setTxtProgressBar(pb, i)
}
collate and append all the result in a data.table format
co_index <- rbindlist(lapply(co_index, FUN = "[[", "col_index"))
co_index$BMS_ID <- bms_id
co_index$SPECIES <- s_sp
co_index_boot <- co_index
saveRDS(co_index_boot, file.path("bms_workshop_data", paste( gsub(" ", "", s_sp), bms_id, "co_index_boot.rds", sep="")))
Figure with CI
bms_id <- "NLBMS"
co_index_boot <- readRDS(file.path("bms_workshop_data", paste( gsub(" ", "", s_sp), bms_id, "co_index_boot.rds", sep="")))
boot0 <- co_index_boot[ , mean(COL_INDEX, na.rm=TRUE), by = M_YEAR]
boot0[, LOGDENSITY0 := log(V1) / log(10)]
boot0[, meanlog0 := mean(LOGDENSITY0)]
Add mean of BOOTi == 0 (real data) to all data
boot_ests <- merge(co_index_boot, boot0[, .(M_YEAR, meanlog0)],
by = c("M_YEAR")
)
boot_ests[, LOGDENSITY := log(COL_INDEX) / log(10)]
boot_ests[, TRMOBSCI := LOGDENSITY - meanlog0 + 2]
boot_ests[, TRMOBSCILOW := quantile(TRMOBSCI, 0.025), by = M_YEAR]
boot_ests[, TRMOBSCIUPP := quantile(TRMOBSCI, 0.975), by = M_YEAR]
ylim_ <- c(min(floor(range(boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, TRMOBSCI]))),
max(ceiling(range(boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, TRMOBSCI]))))
plot_col <- ggplot(data = boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, ], aes(x = M_YEAR, y = TRMOBSCI)) +
geom_point(colour = "orange", alpha = 0.2) + ylim(ylim_) +
geom_line(data = boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, median(TRMOBSCI, na.rm=TRUE), by = M_YEAR], aes(x = M_YEAR, y = V1), linewidth = 1, colour = "dodgerblue4") +
geom_hline(yintercept = 2, linetype = "dashed", color = "grey50") +
scale_x_continuous(minor_breaks = waiver(), limits = c(2005, 2020), breaks = seq(2005, 2020, by = 5)) +
xlab("Year") + ylab("Abundance Incices (Log/Log(10))") +
labs(subtitle = paste(s_sp, bms_id, sep = " "))
plot_col
Calculating species trends
source("workshop_functions.R")
Read in collated indices for species and BMS of interest
Specify the species - note the underscore
spp <- "Maniola_jurtina"
Specify the BMS
bms <- "UKBMS"
Read in the bootstrapped collated indices
co_index <- readRDS(paste0("./bms_workshop_data/", spp, "_co_index_boot.rds"))
Filter to a single BMS to focus upon
co_index <- co_index[BMS_ID == bms]
co_index
Convert to log 10 collated indices (TRMOBS)
Only use COL_INDEX larger then 0 for calculation or logdensity and trmobs
co_index[COL_INDEX > 0, LOGDENSITY:= log(COL_INDEX)/log(10)][, TRMOBS := LOGDENSITY - mean(LOGDENSITY) + 2, by = .(BOOTi)]
co_index
Estimate and classify species trends with a confidence interval based on the bootstraps
sp_trend <- estimate_boot_trends(co_index)
sp_trend
Plot the species log collated index with bootstrapped 95% confidence interval and linear trend line (in red)
Calculate mean log index for original data
co_index0_mean <- mean(co_index[BOOTi == 0]$LOGDENSITY, na.rm = TRUE)
Derive interval from quantiles
co_index_ci <- merge(co_index[BOOTi == 0, .(M_YEAR, TRMOBS, BMS_ID)],
co_index[BOOTi != 0, .(
LOWER = quantile(LOGDENSITY - co_index0_mean + 2, 0.025, na.rm = TRUE),
UPPER = quantile(LOGDENSITY - co_index0_mean + 2, 0.975, na.rm = TRUE)),
by = .(BMS_ID, M_YEAR)],
by=c("BMS_ID","M_YEAR"))
ggplot(co_index_ci, aes(M_YEAR, TRMOBS))+
theme(text = element_text(size = 14))+
geom_line()+
geom_point()+
geom_ribbon(aes(ymin = LOWER, ymax = UPPER), alpha = .3)+
geom_smooth(method="lm", se=FALSE, color="red")+
xlab("Year")+ylab(expression('log '['(10)']*' Collated Index'))+
ggtitle(paste("Collated index for", gsub("_", " ", spp), "in", bms))
Multi-species indicators
Read in collated indices for two species and filter to one BMS
bms <- "UKBMS"
co_index <- rbind(readRDS("./bms_workshop_data/Maniola_jurtina_co_index_boot.rds"),
readRDS("./bms_workshop_data/Polyommatus_icarus_co_index_boot.rds"))
co_index <- co_index[BMS_ID == bms]
co_index
Convert to log 10 collated indices (TRMOBS)
co_index[COL_INDEX > 0, LOGDENSITY:= log(COL_INDEX)/log(10)][,TRMOBS := LOGDENSITY - mean(LOGDENSITY) + 2,
by = .(SPECIES, BOOTi)]
co_index
calculate and plot the indicator, where the confidence interval is based on the bootstraps
First we generate the indicator for real data
msi <- produce_indicator0(co_index[BOOTi == 0])
msi
Then generate an indicator for each bootstrap
msi_boot <- produce_indicators_boot(co_index[BOOTi > 0])
str(msi_boot)
msi_boot[,1:5]
add a confidence interval to the indicator based on quantiles from the bootstrapped indicators
msi <- add_indicator_CI(msi, msi_boot)
msi
Plot the indicator
ggplot(msi, aes(year, indicator))+
theme(text = element_text(size = 14))+
geom_ribbon(aes(ymin = LOWsmooth1, ymax = UPPsmooth1), alpha=.2, fill = "blue")+
geom_point(color = "blue")+
geom_line(aes(y = SMOOTH), color = "blue")+
ylim(c(0, max(200,max(msi$UPPsmooth1))))+
ggtitle(paste("Example indicator for", bms, "based on two species"))
Calculate the indicator trend including confidence interval from the bootstraps
msi_trend <- estimate_ind_trends(msi, msi_boot)
msi_trend