I run a loop over elements of list grouped_data_list using foreach and dopar.
The runtime is terribly slow, while workers are visibly busy.
If I make a vectorized routine with lapply, and without parallelling, this takes seconds. What is wrong with my dopar?
library(data.table)
library('doParallel') # parallel cpu implementation
library('foreach') # parallel looping
grouped_data_dt <- data.table(
Who=c("thdeg","mjg","dfdf","system","df","system","system","hegha","ydvw")
, DocumentExtension=c("jpg","com","dug","182","27","pdf","png","xslt","53")
, What_Action=c("added","removed","added","added","added","removed","added","added","added")
, Date=as.Date(c("2017-11-08","2017-10-10","2017-09-14","2017-09-20","2017-09-21","2017-10-20","2017-10-19","2017-08-24","2017-09-17"))
, Count=c(1,2,3,4,5,6,7,8,9)
)
reported_date_seq_dt <- data.table(
reported_date_seq = as.Date(c(
"2017-08-23","2017-08-24","2017-08-25","2017-08-26","2017-08-27","2017-08-28","2017-08-29","2017-08-30","2017-08-31","2017-09-01","2017-09-02"
,"2017-09-03","2017-09-04","2017-09-05","2017-09-06","2017-09-07","2017-09-08","2017-09-09","2017-09-10","2017-09-11","2017-09-12","2017-09-13"
,"2017-09-14","2017-09-15","2017-09-16","2017-09-17","2017-09-18","2017-09-19","2017-09-20","2017-09-21","2017-09-22","2017-09-23","2017-09-24"
,"2017-09-25","2017-09-26","2017-09-27","2017-09-28","2017-09-29","2017-09-30","2017-10-01","2017-10-02","2017-10-03","2017-10-04","2017-10-05"
,"2017-10-06","2017-10-07","2017-10-08","2017-10-09","2017-10-10","2017-10-11","2017-10-12","2017-10-13","2017-10-14","2017-10-15","2017-10-16"
,"2017-10-17","2017-10-18","2017-10-19","2017-10-20","2017-10-21","2017-10-22","2017-10-23","2017-10-24","2017-10-25","2017-10-26","2017-10-27"
,"2017-10-28","2017-10-29","2017-10-30","2017-10-31","2017-11-01","2017-11-02","2017-11-03","2017-11-04","2017-11-05","2017-11-06","2017-11-07"
,"2017-11-08","2017-11-09","2017-11-10","2017-11-11","2017-11-12","2017-11-13","2017-11-14","2017-11-15","2017-11-16","2017-11-17","2017-11-18"
,"2017-11-19","2017-11-20","2017-11-21","2017-11-22","2017-11-23","2017-11-24","2017-11-25","2017-11-26","2017-11-27"
))
     )
grouped_data_list <- 
          split(x = grouped_data_dt
                , drop = T
                , by = c("Who", "DocumentExtension", "What_Action")
                , sorted = T
                , keep.by = T
          )
 cl <- makeCluster(4)
 registerDoParallel(cl)
 ## replace NA with zeros in the timeseries
 grouped_data_list_2 <- list()
 foreach(
      i = 1:length(grouped_data_list)
         ) %dopar%
 {
      x <- grouped_data_list[[i]]
      data.table::setkey(x, Date)
      dt_params <- unlist(
           x[1, -c('Date', 'Count'), with = F]
           )
      y <- x[reported_date_seq_dt]
      y[is.na(Count), (colnames(y)[!colnames(y) %in% c('Date', 'Count')]) := lapply(1:length(dt_params), function(x) dt_params[x])]
      y[is.na(Count), Count := 0]
      grouped_data_list_2 <- c(grouped_data_list_2
                               , list(y)
      )
 }
 stopCluster(cl)
lapply routine:
## after grouped_data_list is created
 rm(group_replace_func)
 group_replace_func <- function(x)
 {
      setkey(x, Date)
      dt_params <- unlist(
      x[1, -c('Date', 'Count'), with = F]
      )
      y <- x[reported_date_seq_dt]
      y[is.na(Count), (colnames(y)[!colnames(y) %in% c('Date', 'Count')]) := lapply(1:length(dt_params), function(x) dt_params[x])]
      y[is.na(Count), Count := 0]
      return(y)
 }
 grouped_data_list_2 <- lapply(
                          grouped_data_list
                          , group_replace_func
                          )
A new version that works fast (@Roland's advice):
## parallel work
     cl <- makeCluster(4)
     registerDoParallel(cl)
     ## replace NA with zeros in the timeseries
     grouped_data_list_2 <- list()
     grouped_data_list_2 <- foreach(
          x = grouped_data_list
             ) %dopar%
     {
          data.table::setkey(x, Date)
          dt_params <- unlist(
               x[1, -c('Date', 'Count'), with = F]
          )
          y <- x[reported_date_seq_dt]
          y[is.na(Count), (colnames(y)[!colnames(y) %in% c('Date', 'Count')]) := lapply(1:length(dt_params), function(x) dt_params[x])]
          y[is.na(Count), Count := 0]
          y
     }
     stopCluster(cl)
