EM Imputation for Missing Moderator Data • mars

mars supports missing = "em" to impute missing moderator values with a multivariate-normal EM routine before fitting the model.

This option is available for:

Univariate models (structure = "univariate")
Multilevel models (structure = "multilevel")
Multivariate models fit through estimation(..., effectID = ...)

Univariate Example

library(mars)

set.seed(1)
dat_uni <- data.frame(
  study = seq_len(40),
  effect = rnorm(40, 0.2, 0.1),
  var = runif(40, 0.01, 0.05),
  age_mean = rnorm(40, 50, 8)
)
dat_uni$age_mean[c(3, 7, 20)] <- NA_real_

fit_uni_em <- mars(
  data = dat_uni,
  formula = effect ~ age_mean,
  studyID = "study",
  variance = "var",
  varcov_type = "univariate",
  structure = "univariate",
  estimation_method = "MLE",
  missing = "em"
)

fit_uni_em$missing_data
#> $method
#> [1] "em"
#> 
#> $em
#> $em$applied
#> [1] TRUE
#> 
#> $em$converged
#> [1] TRUE
#> 
#> $em$iterations
#> [1] 1
#> 
#> $em$columns
#> [1] "age_mean"
#> 
#> $em$model_columns
#> [1] "age_mean"

Multilevel Example

data(school)

dat_ml <- school
dat_ml$year_mod <- dat_ml$year
dat_ml$year_mod[c(1, 5, 10)] <- NA_real_

fit_ml_em <- mars(
  data = dat_ml,
  formula = effect ~ year_mod + (1 | district/study),
  studyID = "district",
  variance = "var",
  varcov_type = "multilevel",
  structure = "multilevel",
  estimation_method = "MLE",
  missing = "em"
)

fit_ml_em$missing_data
#> $method
#> [1] "em"
#> 
#> $em
#> $em$applied
#> [1] TRUE
#> 
#> $em$converged
#> [1] TRUE
#> 
#> $em$iterations
#> [1] 1
#> 
#> $em$columns
#> [1] "year_mod"
#> 
#> $em$model_columns
#> [1] "year_mod"

Multivariate Example

For multivariate fixed-effects moderator models, use estimation() with an effectID and multivariate_covs.

set.seed(2)
dat_mv <- data.frame(
  study = rep(seq_len(8), each = 2),
  comparison_id = rep(1:2, 8),
  yi = rnorm(16, 0.2, 0.2),
  vi = runif(16, 0.02, 0.06),
  n = sample(30:80, 16, replace = TRUE),
  followup_months = rep(seq(3, 24, length.out = 8), each = 2)
)
dat_mv$followup_months[c(2, 9, 14)] <- NA_real_

fit_mv_em <- estimation(
  effect_name = "yi",
  data = dat_mv,
  studyID = "study",
  effectID = "comparison_id",
  variance = "vi",
  varcov_type = "multilevel",
  structure = "DIAG1",
  multivariate_covs = ~ followup_months,
  estimation_method = "MLE",
  N = "n",
  missing = "em"
)

fit_mv_em$missing_data
#> $method
#> [1] "em"
#> 
#> $em
#> $em$applied
#> [1] TRUE
#> 
#> $em$converged
#> [1] TRUE
#> 
#> $em$iterations
#> [1] 1
#> 
#> $em$columns
#> [1] "followup_months"
#> 
#> $em$model_columns
#> [1] "followup_months"

Notes

EM imputation targets moderator columns only.
Other missing values (for example, effect sizes or variances) are still handled by row removal after imputation.
Current EM support is for numeric moderators with missing values.