corella
corella is an R package that helps users standardize their data using the Darwin Core data standard, used for biodiversity data like species occurrences. corella provides tools to prepare, manipulate and validate data against the standard’s criteria. Once standardized, data can be subsequently shared as a Darwin Core Archive and published to open data infrastructures like the Atlas of Living Australia and GBIF.
If you have any questions, comments, or spot any bugs, email us or report an issue in the R package or the Python package on our GitHub page.
library(corella)
# example data
<- tibble::tibble(
my_data latitude = c(-35.310, -35.273),
longitude = c(149.125, 149.133),
species = c("Callocephalon fimbriatum", "Eolophus roseicapilla"),
location_id = c("ARD001", "ARD001")
)
# standardise
|>
my_data set_occurrences(
occurrenceID = composite_id(location_id, sequential_id()),
basisOfRecord = "humanObservation"
|>
) set_coordinates(
decimalLatitude = latitude,
decimalLongitude = longitude
|>
) set_locality(
country = "Australia",
locality = "Canberra"
|>
) set_scientific_name(
scientificName = species,
taxonRank = "species"
|>
) set_taxonomy(
kingdom = "Animalia",
phylum = "Aves"
)
# A tibble: 2 × 11
location_id occurrenceID basisOfRecord decimalLatitude decimalLongitude
<chr> <chr> <chr> <dbl> <dbl>
1 ARD001 ARD001-01 humanObservation -35.3 149.
2 ARD001 ARD001-02 humanObservation -35.3 149.
# ℹ 6 more variables: country <chr>, locality <chr>, scientificName <chr>,
# taxonRank <chr>, kingdom <chr>, phylum <chr>
import pandas as pd
import corella
# example data
= pd.DataFrame({
my_data "latitude" : [-35.310, -35.273],
"longitude": [149.125, 149.133],
"species": ["Callocephalon fimbriatum", "Eolophus roseicapilla"],
"location_id": ["ARD001", "ARD001"]
})
# standardise
= corella.set_occurrences(
my_data
my_data,= True,composite_id="location_id",sequential_id=True,add_sequential_id='first',
occurrenceID = "HumanObservation"
basisOfRecord
)= corella.set_coordinates(
my_data
my_data,= "latitude",
decimalLatitude = "longitude"
decimalLongitude
)= corella.set_locality(
my_data
my_data,= "Australia",
country = "Canberra"
locality
)= corella.set_scientific_name(
my_data
my_data,= "species",
scientificName = "species"
taxonRank
)= corella.set_taxonomy(
my_data
my_data,= "Animalia",
kingdom = "Aves"
phylum
) my_data
occurrenceID decimalLatitude decimalLongitude ... locality kingdom phylum
0 0-ARD001 -35.310 149.125 ... Canberra Animalia Aves
1 1-ARD001 -35.273 149.133 ... Canberra Animalia Aves
[2 rows x 10 columns]