1

I'm interested in using lists to run multiple statistics test with one set of code.

For example, I want to run glm() tests that vary in terms of DVs, IVs, data, and family, based on rows in a data frame / list. I can do this the long way, and I can use lapply() to do this a "medium way" such that I can change the DV used in the test. But I would like to know if there is a method {preferably using lapply()} to complete this task with less code and in a more automated/iterative fashion.

For the example data, I created 2 datasets using the ggplot2::diamonds data and the code below:

### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)

### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)

I then used these data to create a data frame with list information, and got these results:

## creates df with variable info
model_variable_df <-
  data.frame(
    cbind(
      DV_name = c("carat", "depth", "price"),
      DV_label = c("carat size", "depth size", "diamond price"),
      dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
      IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
      family = c("poisson", "poisson", "gaussian")
    )
  )

> model_variable_df
  DV_name      DV_label       dataset_name         IV_name   family
1   carat    carat size diamonds_bottom300   x + y + color  poisson
2   depth    depth size diamonds_bottom300 x + y + clarity  poisson
3   price diamond price    diamonds_top300   x + z + color gaussian

I can accomplish my task using the long method:

## long form of 3 models

### creates first model
freq_glm_poisson_carat <- 
  (
  glm(
    carat ~ x + y + color,
             data = diamonds_bottom300, 
             family = poisson()
           )
  )

### creates 2nd model
freq_glm_binomial_depth <- 
  glm(
    depth ~ x + y + clarity,
    data = diamonds_bottom300, 
    family= poisson()
  )

### creates 3rd model
freq_glm_gaussian_price <- 
  glm(
    price ~ x + z + color,
    data = diamonds_top300, 
    family= gaussian()
  )

I can also use the medium method for more specific and limited DV_name based tasks.

## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  lapply(model_variable_df$DV_name,
         function(DV_list) wrapr::let(
           c(DV_col = DV_list, 
             dataset_obj = "diamonds_top300"),
           glm(
             DV_col ~ x + y + color, 
             data = dataset_obj,
             family = poisson()
             )
         )
  )
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter", 
                                                                       model_variable_df$DV_name,
                                                                       sep = "__")
  )
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")

Is there any method I could use to complete this task using (1) less code and (2) more iteration/automation? Any and all help is much appreciated.

FYI, I use RStudio on a 2013 Intel Macbook Pro.

Thanks.



Code used for practice:

# sets up data

## Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: run mixed effects models
if(!require(lme4)){install.packages("lme4")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}
# ---- NOTE: for iteration
if(!require(wrapr)){install.packages("wrapr")}

## dataset creation

### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)

### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)

## creates df with variable info
model_variable_df <-
  data.frame(
    cbind(
      DV_name = c("carat", "depth", "price"),
      DV_label = c("carat size", "depth size", "diamond price"),
      dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
      IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
      family = c("poisson", "poisson", "gaussian")
    )
  )

## long for of 3 models

### creates first model
freq_glm_poisson_carat <- 
  (
  glm(
    carat ~ x + y + color,
             data = diamonds_bottom300, 
             family = poisson()
           )
  )

### creates 2nd model
freq_glm_binomial_depth <- 
  glm(
    depth ~ x + y + clarity,
    data = diamonds_bottom300, 
    family= poisson()
  )

### creates 3rd model
freq_glm_gaussian_price <- 
  glm(
    price ~ x + z + color,
    data = diamonds_top300, 
    family= gaussian()
  )

## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  lapply(model_variable_df$DV_name,
         function(DV_list) wrapr::let(
           c(DV_col = DV_list, 
             dataset_obj = "diamonds_top300"),
           glm(
             DV_col ~ x + y + color, 
             data = dataset_obj,
             family = poisson()
             )
         )
  )
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter", 
                                                                       model_variable_df$DV_name,
                                                                       sep = "__")
  )
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")

1
  • or Map, they are all loops Map(function(x, y, d, fam) glm(reformulate(x, y), data = get(d), family = match.fun(fam)), x = model_variable_df$IV_name, y = model_variable_df$DV_name, d = model_variable_df$dataset_name, fam = model_variable_df$family)
    – rawr
    Commented Apr 28, 2021 at 5:17

1 Answer 1

0

You can do this with lapply as :

lapply(seq(nrow(model_variable_df)), function(i) {
  val <- model_variable_df[i, ]
  glm(as.formula(paste(val$DV_name, val$IV_name, sep = '~')), 
      data = get(val$dataset_name), family = val$family)
}) -> model_list

as.formula is used to convert string to formula and get is used to get the dataset from the string value.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.