# Load libraries ----------------------------------------------------------
library(dplyr)
library(tidyr)
library(sbtools)
library(stringr)
library(worrms)
# Read data from ScienceBase ----------------------------------------------
# link to data: https://www.sciencebase.gov/catalog/item/5a709594e4b0a9a2e9d88e4e
<- '5a709594e4b0a9a2e9d88e4e'
sb_id ::item_get(sb_id = sb_id)
sbtools
<- item_list_files(sb_id = sb_id)
sb_filenames
<-readr::read_csv(file = sb_filenames$url[1])
BTA <- readr::read_csv(file = sb_filenames$url[2])
Infauna <- readr::read_csv(file = sb_filenames$url[3])
SedChem
# Revise AphiaID ----------------------------------------------------------
# Manually add AphiaID for Caridea
# Although NA in original data, it exists in WoRMS as an infraorder
<- Infauna %>%
Infauna mutate(AphiaID = case_when(TaxaName == "Caridea" ~ 106674,
TRUE ~ AphiaID)
)
# Occurrence Table ----------
<- Infauna %>%
Infauna_Occurrence
# Rename columns
rename(
materialEntityID = SampleID
%>%
)
# We have to filter out samples with no individuals since they don't have any occurrences
filter(!TaxaName == "No individuals") %>%
# Use mutate to wrangle and create new columns
# For eventID, I used the same combination of variables that's in `notebook_event`
mutate(
eventDate = DateCollected %>%
as.Date("%m/%d/%Y"),
eventID = paste(Site, eventDate %>% as.character(), materialEntityID,
sep = "_") %>% str_remove_all(pattern = "-"),
occurrenceStatus = "present",
basisOfRecord = "HumanObservation",
verbatimIdentification = TaxaName,
individualCount = Abundance,
associatedTaxa = paste("livesNear:", Coral),
taxonRank = NA,
locality = paste("BOEM Lease Block", Site),
higherGeography = paste("Gulf of Mexico",
paste("BOEM Lease Block",
sep = " | "),
Site), occurrenceRemarks = case_when(Location == "Near" ~ paste("within 1 meter of", Coral),
== "Background" ~ paste("14 to 1000 meters away from", Coral)
Location
)%>%
)
# Here we group by materialEntityID to allow us to use row numbers to more easily create unique occurrenceIDs
# In this scenario, we can use row numbers because this is a static dataset, however this is likely not appropriate for continuously growing datasets
group_by(materialEntityID) %>%
mutate(
occurrenceID = paste(materialEntityID, row_number(), sep = "_")
%>%
) ungroup() %>%
# Select columns we want included in the output
select(
eventID,
occurrenceID,
eventDate,
verbatimIdentification,
occurrenceStatus,
basisOfRecord,
individualCount,
associatedTaxa,
occurrenceRemarks,
AphiaID,
TSN,
locality,
higherGeography
)
# Pulling AphiaIDs and adding them to the main table ----------------------
# Here we pull AphiaIDs from WoRMS, using `wm_records` then `lapply` to circumvent limitations on the number of input values
<- Infauna$AphiaID %>% na.omit() %>% unique()
myAphiaID
<- lapply(myAphiaID, function(x) wm_record(id = x)) %>%
myAphiaID ::rbindlist()
data.table
# Create taxanomic table to joing with occurrence table
<- select(
uniqueAphiaSelectColumns .data = myAphiaID,
scientificname,
rank,
kingdom,
phylum,
class,
order,
family,
genus,
lsid,
AphiaID%>%
)
rename(
scientificName = scientificname,
taxonRank = rank,
scientificNameID = lsid
)
# Joining the AphiaID and taxanomic table to our occurrence table by the common term "AphiaID"
<- left_join(Infauna_Occurrence, uniqueAphiaSelectColumns, by = c("AphiaID" = "AphiaID")) %>%
Occurrence_Ext
# We tell it to only add the TSN to taxonRemarks if there is a TSN provided
mutate(
taxonRemarks = ifelse(is.na(TSN), NA, paste0("urn:lsid:itis.gov:itis_tsn:", TSN)),
countryCode = "US"
%>%
) subset(select = -c(AphiaID)) %>%
select(eventID,
occurrenceID,
eventDate,
scientificName,
scientificNameID,
taxonRemarks,everything())
# Exporting the table as a .csv to upload to the IPT ----------------------
# checks if data directory exists and if not, creates it
if(!dir.exists("../data")){
dir.create("data")
}
# exports the table
%>%
Occurrence_Ext write.csv(
paste0(here::here("data", "gomx_sediment_macrofauna_occurrence_"), Sys.Date(), ".csv"),
na = "",
fileEncoding = "UTF-8",
row.names = FALSE
)
Occurrence Extension Example Script
This is the script, in its entirety, to create the occurrence extension table from our example dataset:
You can also download the entire script here: notebook_occurrence.R
Portability
We used the renv
package to make this script more portable. More information on this package and how it works can be found here.