Raster parallel process in R

Raster parallel process in R

By Fidel Maureira Sotomayor

Install libraries if there are no already in your computer.

 1packages = c('raster','terra', 'snow', 'parallel', 'foreach','doParallel','rasterVis', 'mapview','ggplot2', 'lubridate','pryr')
 2out = sapply(packages ,  function(p){
 3  print(p)
 4  if(require(p,character.only = TRUE)==FALSE) {
 5    install.packages(p)}
 6   
 7    require(p,character.only = TRUE)  })
 8
 9
10
11my_function = function(r, na.rm=TRUE, fun=mean) {
12  fun(getValues(r), na.rm=na.rm)
13}
r

Download files, alternative is possible use the function raster::getData. More details on https://worldclim.org/data/worldclim21.html

1temp <- tempfile()
2tempd <- tempdir()
3#download worldclim climate data
4download.file('https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tavg.zip',temp, mode="wb")
5#raster::getData()
6unzip(temp, exdir=tempd)
7#dir(tempd) # explore the temp directory
r

Explore the data

1file_names = list.files(tempd,'wc2.1_10m_tavg')
2tmean <- stack(file.path(tempd, file_names))
3mapview(tmean[[7]])
r
## Warning in rasterCheckSize(x, maxpixels = maxpixels): maximum number of pixels for Raster* viewing is 5e+05 ; 
## the supplied Raster* has 2332800 
##  ... decreasing Raster* resolution to 5e+05 pixels
##  to view full resolution set 'maxpixels =  2332800 '

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded ellps WGS 84 in Proj4 definition: +proj=merc +a=6378137
## +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null
## +wktext +no_defs +type=crs

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum World Geodetic System 1984 in Proj4 definition

## Warning in rgdal::rawTransform(projfrom, projto, nrow(xy), xy[, 1], xy[, : 104
## projected point(s) not finite

## Warning in rgdal::rawTransform(projfrom, projto, nrow(xy), xy[, 1], xy[, : 104
## projected point(s) not finite

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded ellps WGS 84 in Proj4 definition: +proj=merc +a=6378137
## +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null
## +wktext +no_defs +type=crs

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum World Geodetic System 1984 in Proj4 definition
tmean[[7]]
-60-40-20020

NA
5000 km
3000 mi
Leaflet | © OpenStreetMap contributors © CARTO

Create cluster and load functions. Alternative you can create your own scrip file with function and just load this file.

1cl <- makeCluster(4, type='SOCK', outfile='log.txt')
2registerDoParallel(cl)
3out=clusterEvalQ(cl,library(raster))
4env= where('my_function')
5clusterExport(cl, list('my_function'),envir = env)
6#out =clusterEvalQ(cl, source('my_functions.R'))
r

We going to use two way of paralleling in R with the snow/parallel and foreach libraries.

1system.time({
2  clusterExport(cl,list('tmean'))
3  mean_planet = parLapply(cl,c(1:12), function(l){
4    my_function(tmean[[l]])
5  })
6  mean_planet = unlist(mean_planet)
7})
r
##    user  system elapsed 
##    0.00    0.00    1.51
1system.time({
2  mean_planet2 = foreach(l = c(1:12),
3                         .combine = rbind,
4                         .errorhandling = 'remove') %dopar%
5    my_function(tmean[[l]])
6}) 
r
##    user  system elapsed 
##    0.04    0.02    1.37
1results = data.frame(layer_name = names(tmean),
2                     month =  month(1:12,label=TRUE, abbr = TRUE),
3                     parLapply_out =mean_planet,
4                     foreach_out = mean_planet2)
r

The first plot show the global mean temperature for each month. The second figure show that the two paralleling outcome the same results

1ggplot(results,aes(x=month,y=parLapply_out, col='parLapply_out'))+
2  geom_point()+labs(y='Mean temperature (°C)', col='', x='')+
3  geom_point(aes(y=foreach_out,col = 'foreach_out'))+theme_minimal()
r

We download the raster and make a split overlapped extent. The overlapped extent is only for the use of focus function, any other process should use a non-overlapped extent! Also is possible use the function focus in raster. but it is slower function.

1file_raster = 'https://www.dropbox.com/s/onp3f75pab7z3j9/intro-1508529851.jpg?dl=1'
2
3r_raster = stack(file_raster)
4e =extent(r_raster)
5extent_r =list(r1 =extent(0, floor(e@xmax/2)+2, 0, floor(e@ymax/2)+2),
6               r2 = extent(ceiling (e@xmax/2), e@xmax, floor (e@ymax/2), e@ymax),
7               r3 =  extent(0, floor (e@xmax/2)+2,  floor (e@ymax/2), e@ymax),
8               r4 =  extent(ceiling (e@xmax/2), e@xmax,  0, floor (e@ymax/2)+2))
r

We call the parLapply from a lapply environment, so we need export elements from this environment (env).

 1tmean_smooth = lapply(c(1:3) , function(l){
 2  env= where('l')
 3  clusterExport(cl, list('extent_r','r_raster','l'),envir=env)
 4  r_parts = parLapply(cl,seq_along(extent_r),function(part){
 5    r = r_raster[[l]]
 6    r_part <- crop(r, extent_r[[part]])
 7    #focal(r_part,w=matrix(1,3,3), fun='mean',na.rm=TRUE)
 8    terra::focal(r_part,w=matrix(1,3,3), fun='mean',na.rm=TRUE)
 9    #r_part
10  })
11  
12  m1 <- mosaic(r_parts[[1]], r_parts[[2]],r_parts[[3]],r_parts[[4]], fun=mean)
13  
14  m1
15})
16stopCluster(cl)
17#r_smooth = stack(tmean_smooth[[1]],tmean_smooth[[2]],tmean_smooth[[3]])
18r_smooth = do.call(stack,tmean_smooth)
19
20plotRGB(r_smooth)
21abline(v=390,h=219,col='green')
r

Versus original

1plotRGB(r_raster)
r

##Visualization

more info in https://oscarperpinan.github.io/rastervis/

1data(volcano)
2volcano = raster(volcano)
3library(viridis)
r
## Loading required package: viridisLite
1rasterVis::gplot(volcano) + 
2  geom_tile(aes(fill = (value)))  +
3  viridis::scale_fill_viridis(direction = -1, 
4                              na.value='#FFFFFF00') +
5  labs(fill='Altitud (m) ', x='E',y='N')+
6  theme_bw()
r

1levelplot(volcano,contour=TRUE)
r

1xyplot(wc2.1_10m_tavg_03~wc2.1_10m_tavg_07|cut(x, 4), data = tmean, auto.key = list(space='right'))
r