I'm taking in a large data frame and want to perform analyses on subsets of the original data (mean, st deviation, etc). Right now I have code for making new data frames with the columns of interest as shown :
df1 <- data_clean %>%
  filter(sex=="Male" & experiment_group == "Saline")  %>%
  mutate(avg_presses = rowMeans(select(., c("total1", "total2", "total3")), na.rm=TRUE))
cohort_avg <- c()         #initialize cohort avgs empty vector
cohort_std <- c()
for (cohort_num in 1:max(df1$cohort)) {     # loop through all cohorts 
  data_build <- CI_fem_coca                         # make temp dataframe initialized w original 
  for (i in 1:nrow(df1)) {                  #loop through all rows
    if (df1$cohort[i] == cohort_num) {      #if row cohort number is equal to the cohort # to group 
      data_build <- df1 %>%
        filter(cohort==cohort_num) %>%
        mutate(avgs = mean(avg_presses, na.rm=TRUE),    #add new column avgs that is the cohort avg 
               std = sd(avg_presses, na.rm=TRUE))   
    }
  }
  cohort_avg<- c(cohort_avg, data_build$avgs)     #add cohort group avg to vector 
  cohort_std <- c(cohort_std, data_build$std)
}
df1 <- df1 %>%                    #add cohort avgs to original dataframe 
  add_column(cohort_avgs = cohort_avg, cohort_sd=cohort_std ) 
df1 <- df1 %>%
  mutate(z_score = (avg_presses - cohort_avgs)/cohort_sd)
This works fine but I have four data frames to perform this exact analysis on, and writing it 4 times seems clunky. Is there a way to add the four data frames to a list and then iterate over each data frame? I tried :
CI_list <- list(df1, df2, df3, df4)
for (i in 1:length(CI_list)) {
  cohort_avg <- c()         
  cohort_std <- c()
  for (cohort_num in 1:max(CI_list[[i]]$cohort)) {     
    data_build <- CI_list[[i]]                        
    for (i in 1:nrow(CI_list[[i]])) {                 
      if (CI_list[[i]]$cohort[i] == cohort_num) {      
        data_build <- CI_list[[i]] %>%
          filter(cohort==cohort_num) %>%
          mutate(avgs = mean(avg_presses, na.rm=TRUE),    
                 std = sd(avg_presses, na.rm=TRUE))   
      }
    }
    cohort_avg<- c(cohort_avg, data_build$avgs)     
    cohort_std <- c(cohort_std, data_build$std)
  }
  CI_list[[i]] <- CI_list[[i]] %>%                    
    add_column(cohort_avgs = cohort_avg, cohort_sd=cohort_std ) 
  
  CI_list[[i]] <- CI_list[[i]] %>%
    mutate(z_score = (avg_Infusions - cohort_avgs)/cohort_sd)
  
  
  
}
but i get a subscript out of bounds error. is there a better way to do this?
Edit: in trying to use a function and lapply I did:
find_CI_zscore <- function(df) {
  for (i in 1:length(df)) {
    cohort_avg <- c()         #initialize cohort avgs empty vector
    cohort_std <- c()
    for (cohort_num in 1:max(df$cohort)) {     # loop through all cohorts 
      data_build <- df                         # make temp dataframe initialized w original data
      for (i in 1:nrow(df)) {                  #loop through all rows
        if (df$cohort[i] == cohort_num) {      #if row cohort number is equal to the cohort # to group together
          data_build <- df %>%
            filter(cohort==cohort_num) %>%
            mutate(avgs = mean(avg_Infusions, na.rm=TRUE),    #add new column avgs that is the cohort avg infusion #
                   std = sd(avg_Infusions, na.rm=TRUE))   
        }
      }
      cohort_avg<- c(cohort_avg, data_build$avgs)     #add cohort group avg to vector 
      cohort_std <- c(cohort_std, data_build$std)
    }
    df <- df %>%                    #add cohort avgs to original dataframe 
      add_column(cohort_avgs = cohort_avg, cohort_sd=cohort_std ) 
    
    CI_list <- CI_list %>%
      mutate(z_score = (avg_Infusions - cohort_avgs)/cohort_sd)
  }
}
for (i in 1:length(CI_list)) {
  lapply(CI_list[[i]], find_CI_zscore)
And I get this error: Error in df$cohort : $ operator is invalid for atomic vectors How do I use lapply to take in data frames not vectors?
New edit: this is the dput let me know if that worked:
> dput(list(df1[1:7, ], df2[1:7, ]))
list(structure(list(cohort = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), avg_Infusions = c(31.3333333333333, 
32.6666666666667, 4, 20, 7, 22.6666666666667, 11.3333333333333
)), row.names = c(NA, 7L), class = "data.frame"), structure(list(
    cohort = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), avg_Infusions = c(6.66666666666667, 
    17.6666666666667, 17.3333333333333, 0.333333333333333, 10, 
    8.66666666666667, 20)), row.names = c(NA, 7L), class = "data.frame"))
 
    