I am running analysis on a large group of independent and dependent variables, so I have created a for loop to speed up the process of cycling through these. However, for some reason, when I try to run the shapiro.test(x) function within the for loop, I get the following error:
"Error in shapiro.test(as.numeric(dependent_vars_ttest[[y]])) : sample size must be between 3 and 5000"
But, when I run each variable on their own, they work just fine. Is this an issue with how I've constructed the loop? Here is my complete code:
setwd("~/Desktop/Stats")
data <- read.csv("Stats.csv", header = TRUE)
view(data)
independent_vars <- c(data$Smokes,data$Anx,data$Depression)  
dependent_vars_ttest <- c(data$ICU,data$Duration) 
dependent_vars_chi2 <- c(data$Complication,data$X90)  
wb <- createWorkbook()
for (x in 1:3){
  for(y in 1:2){
    for(z in 1:2){
  if (shapiro.test(as.numeric(dependent_vars_ttest[[y]]))$p.value >= 0.05 && 
var.test(as.numeric(dependent_vars_ttest[[y]]) ~ independent_vars[[x]], data = 
data)$p.value >= 0.05) {
    t_test_result <- t.test(as.numeric(dependent_vars_ttest[[y]]) ~ 
independent_vars[[x]], data = data)
  } else if (shapiro.test(as.numeric(dependent_vars_ttest[[y]]))$p.value >= 0.05) {
    t_test_result <- wilcox.test(as.numeric(dependent_vars_ttest[[y]]) ~ 
independent_vars[[x]], data = data)
  } else {
    t_test_result <- t.test(as.numeric(dependent_vars_ttest[[y]]) ~ 
independent_vars[[x]], data = data, var.equal = FALSE)
  }
  t_test_table <- data.frame(
    Variable = independent_vars[[x]],
    Dependent_Variable = dependent_vars_ttest[[y]],
    Mean_Group_1 = t_test_result$estimate[1],
    Mean_Group_2 = t_test_result$estimate[2],
    SD_Group_1 = t_test_result$sd[1],
    SD_Group_2 = t_test_result$sd[2],
    p_value = t_test_result$p.value,
    CI_Lower = t_test_result$conf.int[1],
    CI_Upper = t_test_result$conf.int[2]
  )
  addWorksheet(wb, sheetName = paste0(independent_vars[[x]], "_t_test"))
  writeData(wb, sheet = paste0(independent_vars[[x]], "_t_test"), x = t_test_table)
  contingency_table <- table(dependent_vars_chi2[[z]], independent_vars[[x]])
  chi2_test_result <- chisq.test(contingency_table)
  chi2_test_table <- data.frame(
    Variable = independent_vars[[x]],
    Dependent_Variable = dependent_vars_chi2[[z]],
    Group_Count = chi2_test_result$observed,
    Group_Count = chi2_test_result$observed,
    Odds_Ratio = chi2_test_result$estimate,
    Odds_Ratio_CI_Lower = chi2_test_result$conf.int[1],
    Odds_Ratio_CI_Upper = chi2_test_result$conf.int[2],
    p_value = chi2_test_result$p.value
    )
addWorksheet(wb, sheetName = paste0(independent_vars[[x]], "_chi2_test"))
writeData(wb, sheet = paste0(independent_vars[[x]], "_chi2_test"), x = 
chi2_test_table)
    }
  }
}
saveWorkbook(wb, "output.xlsx")
 
    