Raster parallel process in R

Raster parallel process in R

By Fidel Maureira Sotomayor

Install libraries if there are no already in your computer.

 1packages = c('raster','terra', 'snow', 'parallel', 'foreach','doParallel','rasterVis', 'mapview','ggplot2', 'lubridate','pryr')
 2out = sapply(packages ,  function(p){
 3  print(p)
 4  if(require(p,character.only = TRUE)==FALSE) {
 5    install.packages(p)}
 6   
 7    require(p,character.only = TRUE)  })
 8
 9
10
11my_function = function(r, na.rm=TRUE, fun=mean) {
12  fun(getValues(r), na.rm=na.rm)
13}

Download files, alternative is possible use the function raster::getData. More details on https://worldclim.org/data/worldclim21.html

1temp <- tempfile()
2tempd <- tempdir()
3#download worldclim climate data
4download.file('https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tavg.zip',temp, mode="wb")
5#raster::getData()
6unzip(temp, exdir=tempd)
7#dir(tempd) # explore the temp directory

Explore the data

1file_names = list.files(tempd,'wc2.1_10m_tavg')
2tmean <- stack(file.path(tempd, file_names))
3mapview(tmean[[7]])
## Warning in rasterCheckSize(x, maxpixels = maxpixels): maximum number of pixels for Raster* viewing is 5e+05 ; 
## the supplied Raster* has 2332800 
##  ... decreasing Raster* resolution to 5e+05 pixels
##  to view full resolution set 'maxpixels =  2332800 '

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded ellps WGS 84 in Proj4 definition: +proj=merc +a=6378137
## +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null
## +wktext +no_defs +type=crs

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum World Geodetic System 1984 in Proj4 definition

## Warning in rgdal::rawTransform(projfrom, projto, nrow(xy), xy[, 1], xy[, : 104
## projected point(s) not finite

## Warning in rgdal::rawTransform(projfrom, projto, nrow(xy), xy[, 1], xy[, : 104
## projected point(s) not finite

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded ellps WGS 84 in Proj4 definition: +proj=merc +a=6378137
## +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null
## +wktext +no_defs +type=crs

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum World Geodetic System 1984 in Proj4 definition

Paraleling

Create cluster and load functions. Alternative you can create your own scrip file with function and just load this file.

1cl <- makeCluster(4, type='SOCK', outfile='log.txt')
2registerDoParallel(cl)
3out=clusterEvalQ(cl,library(raster))
4env= where('my_function')
5clusterExport(cl, list('my_function'),envir = env)
6#out =clusterEvalQ(cl, source('my_functions.R'))

We going to use two way of paralleling in R with the snow/parallel and foreach libraries.

1system.time({
2  clusterExport(cl,list('tmean'))
3  mean_planet = parLapply(cl,c(1:12), function(l){
4    my_function(tmean[[l]])
5  })
6  mean_planet = unlist(mean_planet)
7})
##    user  system elapsed 
##    0.00    0.00    1.51
1system.time({
2  mean_planet2 = foreach(l = c(1:12),
3                         .combine = rbind,
4                         .errorhandling = 'remove') %dopar%
5    my_function(tmean[[l]])
6}) 
##    user  system elapsed 
##    0.04    0.02    1.37
1results = data.frame(layer_name = names(tmean),
2                     month =  month(1:12,label=TRUE, abbr = TRUE),
3                     parLapply_out =mean_planet,
4                     foreach_out = mean_planet2)

The first plot show the global mean temperature for each month. The second figure show that the two paralleling outcome the same results

1ggplot(results,aes(x=month,y=parLapply_out, col='parLapply_out'))+
2  geom_point()+labs(y='Mean temperature (°C)', col='', x='')+
3  geom_point(aes(y=foreach_out,col = 'foreach_out'))+theme_minimal()

Split large raster

We download the raster and make a split overlapped extent. The overlapped extent is only for the use of focus function, any other process should use a non-overlapped extent! Also is possible use the function focus in raster. but it is slower function.

1file_raster = 'https://www.dropbox.com/s/onp3f75pab7z3j9/intro-1508529851.jpg?dl=1'
2
3r_raster = stack(file_raster)
4e =extent(r_raster)
5extent_r =list(r1 =extent(0, floor(e@xmax/2)+2, 0, floor(e@ymax/2)+2),
6               r2 = extent(ceiling (e@xmax/2), e@xmax, floor (e@ymax/2), e@ymax),
7               r3 =  extent(0, floor (e@xmax/2)+2,  floor (e@ymax/2), e@ymax),
8               r4 =  extent(ceiling (e@xmax/2), e@xmax,  0, floor (e@ymax/2)+2))

We call the parLapply from a lapply environment, so we need export elements from this environment (env).

 1tmean_smooth = lapply(c(1:3) , function(l){
 2  env= where('l')
 3  clusterExport(cl, list('extent_r','r_raster','l'),envir=env)
 4  r_parts = parLapply(cl,seq_along(extent_r),function(part){
 5    r = r_raster[[l]]
 6    r_part <- crop(r, extent_r[[part]])
 7    #focal(r_part,w=matrix(1,3,3), fun='mean',na.rm=TRUE)
 8    terra::focal(r_part,w=matrix(1,3,3), fun='mean',na.rm=TRUE)
 9    #r_part
10  })
11  
12  m1 <- mosaic(r_parts[[1]], r_parts[[2]],r_parts[[3]],r_parts[[4]], fun=mean)
13  
14  m1
15})
16stopCluster(cl)
17#r_smooth = stack(tmean_smooth[[1]],tmean_smooth[[2]],tmean_smooth[[3]])
18r_smooth = do.call(stack,tmean_smooth)
19
20plotRGB(r_smooth)
21abline(v=390,h=219,col='green')

Versus original

1plotRGB(r_raster)

##Visualization

more info in https://oscarperpinan.github.io/rastervis/

1data(volcano)
2volcano = raster(volcano)
3library(viridis)
## Loading required package: viridisLite
1rasterVis::gplot(volcano) + 
2  geom_tile(aes(fill = (value)))  +
3  viridis::scale_fill_viridis(direction = -1, 
4                              na.value='#FFFFFF00') +
5  labs(fill='Altitud (m) ', x='E',y='N')+
6  theme_bw()

1levelplot(volcano,contour=TRUE)

1xyplot(wc2.1_10m_tavg_03~wc2.1_10m_tavg_07|cut(x, 4), data = tmean, auto.key = list(space='right'))