# Load libraries ----------------------------------------------------------
library(dplyr)
library(sbtools)
library(stringr)
# Read data from ScienceBase ----------------------------------------------
# link to data: https://www.sciencebase.gov/catalog/item/5a709594e4b0a9a2e9d88e4e
<- '5a709594e4b0a9a2e9d88e4e'
sb_id ::item_get(sb_id = sb_id)
sbtools
<- item_list_files(sb_id = sb_id)
sb_filenames
<-readr::read_csv(file = sb_filenames$url[1])
BTA <- readr::read_csv(file = sb_filenames$url[2])
Infauna <- readr::read_csv(file = sb_filenames$url[3])
SedChem
# Sediment Core Event Table -------------------------------------------------
# these events represent unique sediment cores (according to the coreID provided by the researchers)
# create table with the sediment cores' min/max values to join later with events
<- Infauna %>%
core_minmax
# Combine Infauna with SedChem
bind_rows(SedChem) %>%
# Rename some columns to DwC terms
rename(
materialEntityID = CoreID,
locationID = Station,
decimalLatitude = Latitude,
decimalLongitude = Longitude,
eventRemarks = EnvironmentalGroup
%>%
)
# For eventID, created a unique ID for each coring event by combining several other fields
mutate(
eventDate = DateCollected %>%
as.Date("%m/%d/%Y"),
eventID = paste(Site, eventDate %>% as.character(), materialEntityID,
sep = "_") %>% stringr::str_remove_all(pattern = "-")) %>%
# Fractions (in the original data) represent the depth of the core samples relative to the sea floor
# We use `group_by` so that we can pull the min/max values of fraction for min/max depths of each core
group_by(eventID) %>%
summarise(Fraction = str_remove_all(Fraction, pattern= " cm") %>%
str_split(pattern = "-") %>%
unlist() %>%
as.integer()) %>%
group_by(eventID) %>%
summarise(maximumDistanceAboveSurfaceInMeters = max(Fraction) %>%
as.numeric()/-100,
minimumDistanceAboveSurfaceInMeters = min(Fraction) %>%
as.numeric()/-100) %>%
# Remove duplicate rows
distinct()
# create event table without sediment core min/max values
<- SedChem %>%
SedChem mutate(
Gear = "Push core")
<- Infauna %>%
Infauna_StationCore bind_rows(SedChem) %>%
rename(
materialEntityID = CoreID,
locationID = Station,
decimalLatitude = Latitude,
decimalLongitude = Longitude,
eventRemarks = EnvironmentalGroup
%>%
)
mutate(
locationRemarks = case_when(Location == "Near" ~ paste("within 1 meter of", Coral),
== "Background" ~ paste("14 to 1000 meters away from", Coral)),
Location geodeticDatum = "WGS84",
eventDate = DateCollected %>%
as.Date("%m/%d/%Y"),
eventID = paste(Site, eventDate %>% as.character(), materialEntityID,
sep = "_") %>% stringr::str_remove_all(pattern = "-"),
minimumDepthInMeters = Depth,
maximumDepthInMeters = Depth,
countryCode = "US",
locality = paste("BOEM Lease Block", Site),
higherGeography = paste("Gulf of Mexico",
paste("BOEM Lease Block",
sep = " | ")) %>%
Site),
# Select columns we want included in the output
select(
eventID,
eventDate,
materialEntityID,
locationID,
decimalLatitude,
decimalLongitude,
higherGeography,
locality,
geodeticDatum,
countryCode,
minimumDepthInMeters,
maximumDepthInMeters,
locationRemarks,
Gear%>%
)
# joining the event table and the table with the min/max core depths
distinct() %>%
left_join(., core_minmax) %>%
mutate(
samplingProtocol = paste(Gear, ",", maximumDistanceAboveSurfaceInMeters, "m long")
%>%
)
select(everything(),
-Gear)
# Sample Level Event Table -------------------------------------------------
# Here we are making an event table for the samples taken within each core, which are child events
<- Infauna %>%
Infauna_Sample
rename(
materialEntityID = SampleID,
locationID = Station,
decimalLatitude = Latitude,
decimalLongitude = Longitude
%>%
)
# the eventID from the previous table is now the parentEventID, and a new eventID is made for the samples
# In creating the columns below, we rely heavily on the stringr package, which has many functions for parsing strings and uses regular expressions (regex)
mutate(
locationRemarks = paste(Location, "coral"),
geodeticDatum = "WGS84",
eventDate = DateCollected %>%
as.Date("%m/%d/%Y"),
parentEventID = paste(Site, eventDate %>% as.character(), CoreID,
sep = "_") %>% stringr::str_remove_all(pattern = "-"),
eventID = paste(Site, eventDate %>% as.character(), materialEntityID, sep = "_") %>%
::str_remove_all(pattern = "-"),
stringrminimumDepthInMeters = Depth,
maximumDepthInMeters = Depth,
countryCode = "US",
locality = paste("BOEM Lease Block", Site),
higherGeography = paste("Gulf of Mexico",
paste("BOEM Lease Block",
sep = " | "),
Site), samplingProtocol = paste(Gear, "fraction"),
# Because we want the depths of core samples, we don't group by core
Fraction = str_extract(Fraction, pattern= ".*\\d"),
maximumDistanceAboveSurfaceInMeters = str_split_i(Fraction, pattern = "-", i = 2) %>%
as.integer()/-100,
minimumDistanceAboveSurfaceInMeters = str_split_i(Fraction, pattern = "-", i = 1) %>%
as.integer()/-100
%>%
)
select(
everything(),
-c(Analysis,
EnvironmentalGroup,
TSN,
AphiaID,
Abundance,
TaxaName)%>%
) distinct()
# Bind tables ----------------------------------------
<- bind_rows(Infauna_StationCore, Infauna_Sample) %>%
Infauna_Event
select(
eventID,
parentEventID,
eventDate,
locationID,
decimalLatitude,
decimalLongitude,
higherGeography,
locality,
geodeticDatum,
countryCode,
minimumDepthInMeters,
maximumDepthInMeters,
samplingProtocol,
locationRemarks,
minimumDistanceAboveSurfaceInMeters,
maximumDistanceAboveSurfaceInMeters,
materialEntityID%>%
)
distinct() %>%
mutate(
minimumDistanceAboveSurfaceInMeters = sprintf("%.2f", minimumDistanceAboveSurfaceInMeters),
maximumDistanceAboveSurfaceInMeters = sprintf("%.2f", maximumDistanceAboveSurfaceInMeters)
)
# Exporting the table as a .csv to upload to the IPT ----------------------
# checks if data directory exists and if not, creates it
if(!dir.exists("../data")){
dir.create("data")
}
# exports the table
%>%
Infauna_Event write.csv(
paste0(here::here("data", "gomx_sediment_macrofauna_event_"), Sys.Date(), ".csv"),
na = "",
fileEncoding = "UTF-8",
row.names = FALSE
)
Event Core Example Script
This is the script, in its entirety, to create the event core table from our example dataset:
You can also download the entire script here: notebook_event.R
Portability
We used the renv
package to make this script more portable. More information on this package and how it works can be found here.