I'm interested in using lists to run multiple statistics test with one set of code.
For example, I want to run glm() tests that vary in terms of DVs, IVs, data, and family, based on rows in a data frame / list. I can do this the long way, and I can use lapply() to do this a "medium way" such that I can change the DV used in the test. But I would like to know if there is a method {preferably using lapply()} to complete this task with less code and in a more automated/iterative fashion.
For the example data, I created 2 datasets using the ggplot2::diamonds data and the code below:
### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)
I then used these data to create a data frame with list information, and got these results:
## creates df with variable info
model_variable_df <-
data.frame(
cbind(
DV_name = c("carat", "depth", "price"),
DV_label = c("carat size", "depth size", "diamond price"),
dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
family = c("poisson", "poisson", "gaussian")
)
)
> model_variable_df
DV_name DV_label dataset_name IV_name family
1 carat carat size diamonds_bottom300 x + y + color poisson
2 depth depth size diamonds_bottom300 x + y + clarity poisson
3 price diamond price diamonds_top300 x + z + color gaussian
I can accomplish my task using the long method:
## long form of 3 models
### creates first model
freq_glm_poisson_carat <-
(
glm(
carat ~ x + y + color,
data = diamonds_bottom300,
family = poisson()
)
)
### creates 2nd model
freq_glm_binomial_depth <-
glm(
depth ~ x + y + clarity,
data = diamonds_bottom300,
family= poisson()
)
### creates 3rd model
freq_glm_gaussian_price <-
glm(
price ~ x + z + color,
data = diamonds_top300,
family= gaussian()
)
I can also use the medium method for more specific and limited DV_name
based tasks.
## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
lapply(model_variable_df$DV_name,
function(DV_list) wrapr::let(
c(DV_col = DV_list,
dataset_obj = "diamonds_top300"),
glm(
DV_col ~ x + y + color,
data = dataset_obj,
family = poisson()
)
)
)
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter",
model_variable_df$DV_name,
sep = "__")
)
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")
Is there any method I could use to complete this task using (1) less code and (2) more iteration/automation? Any and all help is much appreciated.
FYI, I use RStudio on a 2013 Intel Macbook Pro.
Thanks.
Code used for practice:
# sets up data
## Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: run mixed effects models
if(!require(lme4)){install.packages("lme4")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}
# ---- NOTE: for iteration
if(!require(wrapr)){install.packages("wrapr")}
## dataset creation
### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)
## creates df with variable info
model_variable_df <-
data.frame(
cbind(
DV_name = c("carat", "depth", "price"),
DV_label = c("carat size", "depth size", "diamond price"),
dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
family = c("poisson", "poisson", "gaussian")
)
)
## long for of 3 models
### creates first model
freq_glm_poisson_carat <-
(
glm(
carat ~ x + y + color,
data = diamonds_bottom300,
family = poisson()
)
)
### creates 2nd model
freq_glm_binomial_depth <-
glm(
depth ~ x + y + clarity,
data = diamonds_bottom300,
family= poisson()
)
### creates 3rd model
freq_glm_gaussian_price <-
glm(
price ~ x + z + color,
data = diamonds_top300,
family= gaussian()
)
## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
lapply(model_variable_df$DV_name,
function(DV_list) wrapr::let(
c(DV_col = DV_list,
dataset_obj = "diamonds_top300"),
glm(
DV_col ~ x + y + color,
data = dataset_obj,
family = poisson()
)
)
)
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <-
setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter",
model_variable_df$DV_name,
sep = "__")
)
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")
Map
, they are all loopsMap(function(x, y, d, fam) glm(reformulate(x, y), data = get(d), family = match.fun(fam)), x = model_variable_df$IV_name, y = model_variable_df$DV_name, d = model_variable_df$dataset_name, fam = model_variable_df$family)