mars supports missing = "em" to impute missing moderator values with a multivariate-normal EM routine before fitting the model.

This option is available for:

  • Univariate models (structure = "univariate")
  • Multilevel models (structure = "multilevel")
  • Multivariate models fit through estimation(..., effectID = ...)

Univariate Example

library(mars)

set.seed(1)
dat_uni <- data.frame(
  study = seq_len(40),
  effect = rnorm(40, 0.2, 0.1),
  var = runif(40, 0.01, 0.05),
  age_mean = rnorm(40, 50, 8)
)
dat_uni$age_mean[c(3, 7, 20)] <- NA_real_

fit_uni_em <- mars(
  data = dat_uni,
  formula = effect ~ age_mean,
  studyID = "study",
  variance = "var",
  varcov_type = "univariate",
  structure = "univariate",
  estimation_method = "MLE",
  missing = "em"
)

fit_uni_em$missing_data
#> $method
#> [1] "em"
#> 
#> $em
#> $em$applied
#> [1] TRUE
#> 
#> $em$converged
#> [1] TRUE
#> 
#> $em$iterations
#> [1] 1
#> 
#> $em$columns
#> [1] "age_mean"
#> 
#> $em$model_columns
#> [1] "age_mean"

Multilevel Example

data(school)

dat_ml <- school
dat_ml$year_mod <- dat_ml$year
dat_ml$year_mod[c(1, 5, 10)] <- NA_real_

fit_ml_em <- mars(
  data = dat_ml,
  formula = effect ~ year_mod + (1 | district/study),
  studyID = "district",
  variance = "var",
  varcov_type = "multilevel",
  structure = "multilevel",
  estimation_method = "MLE",
  missing = "em"
)

fit_ml_em$missing_data
#> $method
#> [1] "em"
#> 
#> $em
#> $em$applied
#> [1] TRUE
#> 
#> $em$converged
#> [1] TRUE
#> 
#> $em$iterations
#> [1] 1
#> 
#> $em$columns
#> [1] "year_mod"
#> 
#> $em$model_columns
#> [1] "year_mod"

Multivariate Example

For multivariate fixed-effects moderator models, use estimation() with an effectID and multivariate_covs.

set.seed(2)
dat_mv <- data.frame(
  study = rep(seq_len(8), each = 2),
  comparison_id = rep(1:2, 8),
  yi = rnorm(16, 0.2, 0.2),
  vi = runif(16, 0.02, 0.06),
  n = sample(30:80, 16, replace = TRUE),
  followup_months = rep(seq(3, 24, length.out = 8), each = 2)
)
dat_mv$followup_months[c(2, 9, 14)] <- NA_real_

fit_mv_em <- estimation(
  effect_name = "yi",
  data = dat_mv,
  studyID = "study",
  effectID = "comparison_id",
  variance = "vi",
  varcov_type = "multilevel",
  structure = "DIAG1",
  multivariate_covs = ~ followup_months,
  estimation_method = "MLE",
  N = "n",
  missing = "em"
)

fit_mv_em$missing_data
#> $method
#> [1] "em"
#> 
#> $em
#> $em$applied
#> [1] TRUE
#> 
#> $em$converged
#> [1] TRUE
#> 
#> $em$iterations
#> [1] 1
#> 
#> $em$columns
#> [1] "followup_months"
#> 
#> $em$model_columns
#> [1] "followup_months"

Notes

  • EM imputation targets moderator columns only.
  • Other missing values (for example, effect sizes or variances) are still handled by row removal after imputation.
  • Current EM support is for numeric moderators with missing values.