Commit 5cf35357 authored by Brage.Forland's avatar Brage.Forland

Remove dependency on global variables in artsappsdm.R.

Species cache no organised by speciesKey rather than taxonKey so that synonym species are cached in same file.
parent 2d690dff
......@@ -201,10 +201,11 @@ downloadOccupancyData <- function(speciesIDs, outFolder = paste(tempdir(), "occD
occ_download_meta(downloadID)$status == "RUNNING" ||
occ_download_meta(downloadID)$status == "PREPARING"
)) {
cat(".")
Sys.sleep(min(inMaxGBIFTime - totalTime, inAttemptGBIFTime))
totalTime <- totalTime + inAttemptGBIFTime
}
cat("\n")
if(!(occ_download_meta(downloadID)$status %in% c("SUCCEEDED", "SUCCESS"))) {
stop("unable to acquire occurrence data from GBIF")
}
......
......@@ -15,19 +15,23 @@ gbifResolveNames <- function(species, nameCacheFile=NULL, useCache=TRUE, keepUnr
for(sp in speciesMissing) {
gbifName <- rgbif::name_backbone(sp, strict=TRUE, rank="SPECIES")
taxonKey=NA
resolvedName=NA
taxonKey <- NA
resolvedName <- NA
canonicalName<- sp
if(gbifName$matchType == "EXACT") {
if(gbifName$status=="SYNONYM") {
taxonKey <- gbifName$acceptedUsageKey
} else {
taxonKey <- gbifName$usageKey
}
resolvedName=gbifName$species
resolvedName <- gbifName$species
canonicalName <- gbifName$canonicalName
}
speciesName <-sp
speciesName <- sp
# We want the dirname to be a valid R variable name
dirName <- iconv(sp, from="UTF-8", to="ASCII//TRANSLIT")
dirName <- iconv(canonicalName, from="UTF-8", to="ASCII//TRANSLIT")
dirName <- gsub("-|\\s+","_",dirName)
dirName <- make.names(dirName)
......@@ -36,7 +40,7 @@ gbifResolveNames <- function(species, nameCacheFile=NULL, useCache=TRUE, keepUnr
resolvedName=as.character(resolvedName),
taxonKey=as.character(taxonKey),
dirName=as.character(dirName))
}
}
if(!is.null(nameCacheFile)){
tryCatch(
......
......@@ -5,7 +5,8 @@
writeOccurrences <- function(occDataRaw, cacheDir) {
speciesList = unique(occDataRaw$taxonKey)
# speciesList = unique(occDataRaw$taxonKey)
speciesList = unique(occDataRaw$speciesKey)
if(!dir.exists(cacheDir)) {
dir.create(cacheDir, recursive = TRUE)
......@@ -16,7 +17,17 @@ writeOccurrences <- function(occDataRaw, cacheDir) {
for(taxonID in speciesList) {
taxonCacheFile <- paste(cacheDir,"/",taxonID,".csv.gz",sep="")
speciesOccTMP <- occDataRaw[occDataRaw$taxonKey == taxonID, ][c("taxonKey", "decimalLongitude", "decimalLatitude", "species", "family","year","gbifID","speciesKey","lifeStage","institutionCode" )]
#speciesOccTMP <- occDataRaw[occDataRaw$taxonKey == taxonID, ][c("taxonKey", "decimalLongitude", "decimalLatitude", "species", "family","year","gbifID","speciesKey","lifeStage","institutionCode" )]
speciesOccTMP <- occDataRaw[occDataRaw$speciesKey == taxonID, ][c("taxonKey",
"decimalLongitude",
"decimalLatitude",
"species",
"family",
"year",
"gbifID",
"speciesKey",
"lifeStage",
"institutionCode" )]
write_csv(speciesOccTMP, gzfile(taxonCacheFile))
}
......@@ -64,8 +75,14 @@ loadOccurences <- function(taxonIDs, cacheDir=NULL, maxCacheDays = 365) {
taxonCacheFile <- paste(cacheDir,"/",taxonID,".csv.gz",sep="")
if(file.exists(taxonCacheFile) &&
( file.info(taxonCacheFile)$mtime - Sys.time() ) < maxCacheDays * 86400 ) {
df <- read_csv(gzfile(taxonCacheFile), col_types=cols(taxonKey=col_integer(),decimalLongitude=col_double(), decimalLatitude=col_double()))
species.df <- rbind(species.df, cbind(df$taxonKey, df$decimalLongitude, df$decimalLatitude))
df <- read_csv(gzfile(taxonCacheFile),
col_types=cols(taxonKey=col_integer(),
decimalLongitude=col_double(),
decimalLatitude=col_double()))
species.df <- rbind(species.df,
cbind(df$speciesKey, # df$taxonKey,
df$decimalLongitude,
df$decimalLatitude))
taxaFound = c(taxaFound, taxonID)
} else {
taxaMissing = c(taxaMissing, taxonID)
......
......@@ -4,30 +4,36 @@
## Create gridded occrurrences from point data and write gridded and points used in grid to the
## per taxon directory.
##
occurrencesToGrid <- function(occurrences, gridTemplate, speciesInfo, outputLocation = NULL, saveRaster = FALSE, savePoints = FALSE) {
occurrencesToGrid <- function(occurrences,
gridTemplate,
speciesInfo,
outputLocation = NULL,
saveRaster = FALSE,
savePoints = FALSE) {
if((saveRaster ||savePoints) && is.null(outputLocation)) {
warning("outputLocation is not set, ignoring saveRaster and savePoint flags")
saveRaster=FALSE
savePoints=FALSE
}
# Points within template
occPoints <- occurrences[!is.na(raster::extract(gridTemplate, occurrences)),]
taxaFound <- unique(occPoints$taxonKey)
rasterLayers<- lapply(X=taxaFound, FUN=function(taxonID) {
# taxonOcc <- dplyr::filter(occPoints, taxonKey == taxonID)
taxonOcc <- dplyr::filter(occPoints, taxonKey == taxonID)
taxonLayer <- raster::rasterize(taxonOcc,
gridTemplate * 0,
fun='count', field='taxonKey', update=TRUE)
taxonLayer <- raster::mask(taxonLayer, gridTemplate)
curTaxonInfo <- speciesInfo[speciesInfo$taxonKey==taxonID,"dirName"]
dirName <- paste(outputLocation,curTaxonInfo[["dirName"]],sep="/")
if(!dir.exists(dirName)) dir.create(dirName,recursive=TRUE)
......
......@@ -8,11 +8,11 @@
writeRasterStats <- function(dirs, includeFiles='*.tif', outputFile="geotiffstats.tsv") {
lapply(dirs, function(dir) {
message(paste("Writing raster statistics for", dir))
files <- list.files(dir,pattern=glob2rx(includeFiles), recursive=TRUE)
rasterStats <- lapply(files, function(rasterFile) {
rastervals=values(raster::raster(paste(dir,rasterFile,sep="/")))
return( data.frame(
file=rasterFile,
mean=mean(rastervals, na.rm=TRUE),
......
......@@ -20,8 +20,10 @@ WORK_LOC = paste(ARTSAPP_HOME, "tmp", sep="/")
MESH_BOUDARY_LOC = paste(ARTSAPP_HOME, "data/norge/GADMNorway", sep="/")
MESH_BOUNDARY_LAYER = "NOR_adm0"
WORLDCLIM_LOC = paste(ARTSAPP_HOME, "data/norge/WORLDCLIMData", sep="/")
INLA_NUM_THREADS = 24
##
##
source(paste(ARTSAPP_HOME, "R/downloadOccupancyData.R", sep="/"))
source(paste(ARTSAPP_HOME, "R/writeDistributionModelOutput.R", sep="/"))
source(paste(ARTSAPP_HOME, "R/gbifResolveNames.R", sep="/"))
......@@ -60,7 +62,6 @@ opt_parser = OptionParser(
help="Write basic stats for the geotiff files (sum, mean, sd)"),
make_option("--saveagg", type="logical", action="store_true", default=FALSE,
help="Save aggregated probability maps for MeanEst (2x and 4x))")
),
usage= "%prog [options] file"
......@@ -92,68 +93,94 @@ if(length(jobfile)!=1) {
##
## Read job configuration file specifying the species, resolution and output sub directory.
##
if(!file.exists(jobfile)){
stop("Cannot find job configuration file ",jobfile)
}
jobconfig <- read_json(jobfile, simplifyVector=TRUE)
if(is.null(jobconfig$outputLocation) || !length(jobconfig$outputLocation)>0) {
stop("No outputLocation found i in jobfile")
} else {
outputSubLocation <- jobconfig$outputLocation
}
if(is.null(jobconfig$species) || !length(jobconfig$species)>0) {
stop("No species found i in jobfile")
} else {
speciesList <- jobconfig$species
}
if(is.null(jobconfig$species) || !length(jobconfig$species)>0) {
warning("No spatial resolution in jobfile, resolution set to 5m")
spatialres <- "5m"
} else {
spatialres <- jobconfig$resolution
}
setupJob <- function(jobfile) {
##
## Create output directory/directories for job
##
for(outputLoc in rev(outputSubLocation)) {
outputLocation = paste(OUTPUT_LOC, outputLoc, sep="/")
if(!file.exists(jobfile)){
stop("Cannot find job configuration file ",jobfile)
}
jobconfig <- read_json(jobfile, simplifyVector=TRUE)
if(is.null(jobconfig$outputLocation) || !length(jobconfig$outputLocation)>0) {
stop("No outputLocation found i in jobfile")
} else {
outputSubLocation <- jobconfig[["outputLocation"]]
}
if(is.null(jobconfig$species) || !length(jobconfig$species)>0) {
stop("No species found i in jobfile")
} else {
speciesNames <- jobconfig$species
}
if(is.null(jobconfig$species) || !length(jobconfig$species)>0) {
warning("No spatial resolution in jobfile, resolution set to 5m")
spatialres <- "5m"
} else {
spatialres <- jobconfig$resolution
}
##
## Create output directory/directories for job
##
outputLocation = paste(OUTPUT_LOC, outputSubLocation, sep="/")
if(!dir.exists(outputLocation)) {
stopifnot(dir.create(outputLocation, recursive=TRUE))
}
}
## Resolve gbif ids and create name cache file "specieslist.tsv"
nameCacheFile <- paste(outputLocation, "specieslist.tsv", sep="/")
speciesTable <- gbifResolveNames(speciesNames, nameCacheFile)
taxonKeys <- speciesTable[!is.na(speciesTable$taxonKey),"taxonKey"]
cacheStatus <- getDownloadCacheStatus(taxonKeys, CACHE_LOC)
## Add downloaded and done attribute to indicate current status for taxa
speciesTable$downloaded <- FALSE
speciesTable[speciesTable$taxonKey %in% cacheStatus$available,"downloaded"]=TRUE
## Resolve gbif ids
nameCacheFile <- paste(outputLocation, "specieslist.tsv", sep="/")
## Use the presense of a MeanEst file check if model is already run.
outputFiles <- paste(outputLocation, speciesTable$dirName, 'MeanEst.tif', sep="/")
speciesTable$done <- file.exists(outputFiles)
speciesInfo <- gbifResolveNames(speciesList, nameCacheFile)
taxonKeys <- speciesInfo[!is.na(speciesInfo$taxonKey),"taxonKey"]
cacheStatus <- getDownloadCacheStatus(taxonKeys, CACHE_LOC)
## Location of covariate files
covariateDir <- paste(WORLDCLIM_LOC, spatialres, sep="/")
## Template for spation extent (resolution does not really matter)
extentGridFile <- paste(covariateDir, "template.tif", sep="/")
## Work directory for downloads
tempDirectory <- paste(WORK_LOC, outputSubLocation, sep="/")
return(
list(speciesTable = speciesTable, #(speciesName,resolvedName,taxonKey,dirName,downloaded,done)
tempLocation = tempDirectory, # Work directory for downloads
covariateLocation = covariateDir, # Covariate directory
extendGridTemplate = extentGridFile, # Geotiff file used as spatial extent
outputLocation = outputLocation # Output location for jos
)
)
}
jobconfig <- setupJob(jobfile)
## Add downloaded and done attribute to indicate current status for taxa
speciesInfo$downloaded <- FALSE
speciesInfo[speciesInfo$taxonKey %in% cacheStatus$available,"downloaded"]=TRUE
outputFiles <- paste(outputLocation, speciesInfo$dirName, 'MeanEst.tif', sep="/")
speciesInfo$done <- file.exists(outputFiles)
# print(jobconfig)
##
##
##
runStatus <- function(speciesInfo) {
cat(readr::format_tsv(speciesInfo))
runStatus <- function(speciesTable) {
cat(readr::format_tsv(speciesTable))
}
runDownload <- function() {
runDownload <- function(jobconfig) {
# outputSubLoc <- "OccurrenceData"
# GBIF login credentials
# gbifLoginCredentials <- list(
# username = "", # The GBIF username
# password = "", # The GBIF password
# email = "" # The GBIF email
# )
# gbifLoginCredentials <- list(
# username = "", # The GBIF username
# password = "", # The GBIF password
# email = "" # The GBIF email
# )
# Maximum time (in seconds) to wait before failure when attempting to download occurrence data
gbifMaxTime <- 14400
# Time (in seconds) to wait between checks to see if GBIF data is ready to download
......@@ -162,28 +189,35 @@ runDownload <- function() {
yearLimits <- c(1960, Inf)
## Only used for rectangular extent, resolution unimportant
extentGridFile <- paste(WORLDCLIM_LOC, "5m/template.tif", sep="/")
extentGridFile <- jobconfig[["extendGridTemplate"]]
message("Reading extent from ", extentGridFile)
extentGrid <- readGDAL(extentGridFile)
tempDirectory <- paste(WORK_LOC, outputSubLocation, sep="/")
taxaMissing = cacheStatus$missing
taxaFound = cacheStatus$available
tempDirectory <- jobconfig[["tempLocation"]]
speciesTable <- jobconfig[["speciesTable"]]
missing <- speciesTable["downloaded"]== FALSE
taxaMissing <- speciesTable[missing,]$taxonKey
taxaFound <- speciesTable[!missing,]$taxonKey
message("Occurrence cache: ", length(taxaFound), " available, ", length(taxaMissing)," missing")
if(downloadMode == 'all') {
taxaDownload = c(cacheStatus$missing, cacheStatus$found)
taxaDownload = speciesTable$taxonKey
} else {
taxaDownload = cacheStatus$missing
taxaDownload = speciesTable[missing,]$taxonKey
}
if(!is.null(taxaDownload) && length(taxaDownload) > 0) {
print(taxaMissing)
print(tempDirectory)
print(yearLimits)
message("Downloading GBIF occurrences for ",length(taxaMissing)," taxa.")
speciesOccRaw <- downloadOccupancyData(taxaMissing,
tempDirectory,
yearLimits,
GBIFAttemptTim = 60,
templateGrid=extentGrid)
writeOccurrences(speciesOccRaw,CACHE_LOC)
......@@ -194,30 +228,37 @@ runDownload <- function() {
##
## Save gridded and raw occurrences to output directories.
##
runSaveOcc <- function() {
runSaveOcc <- function(jobconfig) {
covariateDir <- paste(WORLDCLIM_LOC, spatialres, sep="/")
gridTemplateFile <- paste(covariateDir, "template.tif", sep="/")
print(gridTemplateFile)
speciesTable <- jobconfig[["speciesTable"]]
gridTemplateFile <- jobconfig[["extendGridTemplate"]]
outputLocation <- jobconfig[["outputLocation"]]
if(!file.exists(gridTemplateFile)) {
error("Unable to open raster template", gridTemplateFile);
}
gridTemplate <- raster(gridTemplateFile)
occurrences <- loadOccurences(speciesInfo$taxonKey, CACHE_LOC)$occurrences
occurrencesToGrid(occurrences, gridTemplate, speciesInfo, outputLocation = outputLocation, saveRaster = TRUE, savePoints = TRUE)
occurrences <- loadOccurences(speciesTable$taxonKey, CACHE_LOC)$occurrences
griddedOcc <- occurrencesToGrid(occurrences,
gridTemplate,
speciesTable,
outputLocation = outputLocation,
saveRaster = TRUE,
savePoints = TRUE)
}
runSaveModelPlots <- function() {
runSaveModelPlots <- function(jobconfig) {
covariateLocation <- jobconfig[["covariateLocation"]]
speciesTable <- jobconfig[["speciesTable"]]
outputLocation<- jobconfig[["outputLocation"]]
covariateDir <- paste(WORLDCLIM_LOC,spatialres, sep="/")
climateData <- loadCovariates(covariateDir)
res <- writeDistributionModelOutput(outputLocation, speciesInfo, climateData)
climateData <- loadCovariates(covariateLocation)
res <- writeDistributionModelOutput(outputLocation, speciesTable, climateData)
}
......@@ -242,7 +283,7 @@ runSaveRasterAggregated <- function(outputLocation) {
##
## Run spatial GLM on all
##
runINLAsdm <- function() {
runINLAsdm <- function(jobconfig) {
## INLA Options
spdeAlpha<- 1.5
......@@ -253,18 +294,18 @@ runINLAsdm <- function() {
)
responseDensity <- 100
covariateDir <- paste(WORLDCLIM_LOC,spatialres, sep="/")
# Read raster with resolution and model extent
gridTemplate <- raster(paste(covariateDir, "template.tif", sep="/"))
speciesTable <- jobconfig[["speciesTable"]]
outputLocation <- jobconfig[["outputLocation"]]
# Read raster with resolution and model extent
gridTemplate <- raster(jobconfig[["extendGridTemplate"]])
# Read the covariate raster files
climateData <- loadCovariates(covariateDir)
climateData <- loadCovariates(jobconfig[["covariateLocation"]])
if(runmodelMode == 'missing') {
modRunTaxa <- speciesInfo[speciesInfo$done == FALSE, ]$taxonKey
modRunTaxa <- speciesTable[speciesTable$done == FALSE, ]$taxonKey
} else {
modRunTaxa <- speciesInfo$taxonKey
modRunTaxa <- speciesTable$taxonKey
}
if(options$maxtaxa > 0 && length(modRunTaxa > options$maxtaxa )) {
......@@ -273,7 +314,7 @@ runINLAsdm <- function() {
occurrencesRaw <- loadOccurences(modRunTaxa, CACHE_LOC)
# Occurrence points to grid
occurrencesGridded <- occurrencesToGrid(occurrencesRaw$occurrences, gridTemplate, speciesInfo,
occurrencesGridded <- occurrencesToGrid(occurrencesRaw$occurrences, gridTemplate, speciesTable,
outputLocation = outputLocation, saveRaster = FALSE, savePoints = FALSE)
occurrenceData <- occurrencesGridded$raster
......@@ -295,7 +336,7 @@ runINLAsdm <- function() {
#
# inOccurrencePoints <- as(occurrences.d$geom, "Spatial" )
#
# output <- writeDistributionModelOutput(res, outputLocation, speciesInfo,inOccurrencePoints, inClimateData )
# output <- writeDistributionModelOutput(res, outputLocation, speciesTable,inOccurrencePoints, inClimateData )
}
......@@ -312,7 +353,7 @@ runINLAsdm <- function() {
if(unlist(options$status)==TRUE) {
runStatus(speciesInfo)
runStatus(jobconfig[["speciesTable"]])
quit()
}
......@@ -327,29 +368,29 @@ library(rgdal)
##
downloadMode <- unlist(options$download)
if(downloadMode != 'none') {
runDownload()
runDownload(jobconfig)
}
saveoccMode <- unlist(options$saveocc)
if(saveoccMode==TRUE) {
runSaveOcc()
runSaveOcc(jobconfig)
}
runmodelMode <- unlist(options$runmodel)
if(runmodelMode != 'none') {
library(INLA)
inla.setOption("num.threads", 24)
runINLAsdm()
inla.setOption("num.threads", INLA_NUM_THREADS )
runINLAsdm(jobconfig)
}
saverasterstatsMode <- unlist(options$saverasterstats)
if(saverasterstatsMode==TRUE) {
runSaveRasterStats(outputLocation)
runSaveRasterStats(jobconfig[["outputLocation"]])
}
saverasteraggMode <- unlist(options$saveagg)
if(saverasteraggMode==TRUE) {
runSaveRasterAggregated(outputLocation)
runSaveRasterAggregated(jobconfig[["outputLocation"]])
}
......@@ -358,12 +399,6 @@ if(unlist(options$saveplots)==TRUE) {
library(ggplot2)
library(ggthemes)
library(grid)
runSaveModelPlots()
runSaveModelPlots(jobconfig)
}
##
##
##
##
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment