# Load libraries ----------------------------------------------------------
library(dplyr)
library(tidyr)
library(sbtools)
library(stringr)
library(worrms)
# Read data from ScienceBase ----------------------------------------------
# link to data: https://www.sciencebase.gov/catalog/item/5a709594e4b0a9a2e9d88e4e
sb_id <- '5a709594e4b0a9a2e9d88e4e'
sbtools::item_get(sb_id = sb_id)
sb_filenames <- item_list_files(sb_id = sb_id)
BTA <-readr::read_csv(file = sb_filenames$url[1])
Infauna <- readr::read_csv(file = sb_filenames$url[2])
SedChem <- readr::read_csv(file = sb_filenames$url[3])
# Revise AphiaID ----------------------------------------------------------
# Manually add AphiaID for Caridea
# Although NA in original data, it exists in WoRMS as an infraorder
Infauna <- Infauna %>%
mutate(AphiaID = case_when(TaxaName == "Caridea" ~ 106674,
TRUE ~ AphiaID)
)
# Occurrence Table ----------
Infauna_Occurrence <- Infauna %>%
# Rename columns
rename(
materialEntityID = SampleID
) %>%
# We have to filter out samples with no individuals since they don't have any occurrences
filter(!TaxaName == "No individuals") %>%
# Use mutate to wrangle and create new columns
# For eventID, I used the same combination of variables that's in `notebook_event`
mutate(
eventDate = DateCollected %>%
as.Date("%m/%d/%Y"),
eventID = paste(Site, eventDate %>% as.character(), materialEntityID,
sep = "_") %>% str_remove_all(pattern = "-"),
occurrenceStatus = "present",
basisOfRecord = "HumanObservation",
verbatimIdentification = TaxaName,
individualCount = Abundance,
associatedTaxa = paste("livesNear:", Coral),
taxonRank = NA,
locality = paste("BOEM Lease Block", Site),
higherGeography = paste("Gulf of Mexico",
paste("BOEM Lease Block",
Site), sep = " | "),
occurrenceRemarks = case_when(Location == "Near" ~ paste("within 1 meter of", Coral),
Location == "Background" ~ paste("14 to 1000 meters away from", Coral)
)
) %>%
# Here we group by materialEntityID to allow us to use row numbers to more easily create unique occurrenceIDs
# In this scenario, we can use row numbers because this is a static dataset, however this is likely not appropriate for continuously growing datasets
group_by(materialEntityID) %>%
mutate(
occurrenceID = paste(materialEntityID, row_number(), sep = "_")
) %>%
ungroup() %>%
# Select columns we want included in the output
select(
eventID,
occurrenceID,
eventDate,
verbatimIdentification,
occurrenceStatus,
basisOfRecord,
individualCount,
associatedTaxa,
occurrenceRemarks,
AphiaID,
TSN,
locality,
higherGeography
)
# Pulling AphiaIDs and adding them to the main table ----------------------
# Here we pull AphiaIDs from WoRMS, using `wm_records` then `lapply` to circumvent limitations on the number of input values
myAphiaID <- Infauna$AphiaID %>% na.omit() %>% unique()
myAphiaID <- lapply(myAphiaID, function(x) wm_record(id = x)) %>%
data.table::rbindlist()
# Create taxanomic table to joing with occurrence table
uniqueAphiaSelectColumns <- select(
.data = myAphiaID,
scientificname,
rank,
kingdom,
phylum,
class,
order,
family,
genus,
lsid,
AphiaID
) %>%
rename(
scientificName = scientificname,
taxonRank = rank,
scientificNameID = lsid
)
# Joining the AphiaID and taxanomic table to our occurrence table by the common term "AphiaID"
Occurrence_Ext <- left_join(Infauna_Occurrence, uniqueAphiaSelectColumns, by = c("AphiaID" = "AphiaID")) %>%
# We tell it to only add the TSN to taxonRemarks if there is a TSN provided
mutate(
taxonRemarks = ifelse(is.na(TSN), NA, paste0("urn:lsid:itis.gov:itis_tsn:", TSN)),
countryCode = "US"
) %>%
subset(select = -c(AphiaID)) %>%
select(eventID,
occurrenceID,
eventDate,
scientificName,
scientificNameID,
taxonRemarks,
everything())
# Exporting the table as a .csv to upload to the IPT ----------------------
# checks if data directory exists and if not, creates it
if(!dir.exists("../data")){
dir.create("data")
}
# exports the table
Occurrence_Ext %>%
write.csv(
paste0(here::here("data", "gomx_sediment_macrofauna_occurrence_"), Sys.Date(), ".csv"),
na = "",
fileEncoding = "UTF-8",
row.names = FALSE
)Occurrence Extension Example Script
This is the script, in its entirety, to create the occurrence extension table from our example dataset:
You can also download the entire script here: notebook_occurrence.R
Portability
We used the renv package to make this script more portable. More information on this package and how it works can be found here.