DATA SOURCE: This lab exercise utilizes the NCES public-use dataset: Education Longitudinal Study of 2002 (Lauff & Ingels, 2014) \(\color{blue}{\text{See website: nces.ed.gov}}\)

# load packages
library(MplusAutomation)
library(haven)
library(rhdf5)
library(tidyverse)
library(here)
library(corrplot)
library(kableExtra)
library(reshape2)
library(semPlot)

1 Lab 9 - Begin


Read in data

lab_data <- read_csv(here("data", "els_sub5_data.csv"))

Preparations: subset,reorder, rename, and recode data

invar_data <-  lab_data %>% 
  select(bystlang, freelnch, byincome,                    # covariates 
         stolen, t_hurt, p_fight, hit, damaged, bullied,  # factor 1 (indicators)
         safe, disrupt, gangs, rac_fght,                  # factor 2 (indicators)
         late, skipped, mth_read, mth_test, rd_test) %>%  
  rename("unsafe" = "safe") %>% 
  mutate(
    freelnch = case_when(    # Grade 10, percent free lunch - transform to binary
        freelnch <  3 ~ 0,   # school has less than 11% 
        freelnch >= 3 ~ 1))  # school has greater than or equal to 11% 

table(invar_data$freelnch) # reasonably balanced groups

Take a quick look at variable distributions

melt(invar_data[,4:13]) %>% 
  ggplot(., aes(x=value, label=variable)) +
  geom_histogram(bins = 15) +
  facet_wrap(~variable, scales = "free")

Reverse code factor for ease of interpretation

cols = c("unsafe", "disrupt", "gangs", "rac_fght")

invar_data[ ,cols] <-  5 - invar_data[ ,cols]

Factor names and interpretation:

Check correct coding, explore correlations

cor_matrix <- cor(invar_data[4:13], use = "pairwise.complete.obs")

corrplot(cor_matrix, 
         method = "circle",
         type = "upper")

2 Estimate the Unconditional Confirmatory Factor Analysis (CFA) model


Number of parameters = 31

cfa_m0  <- mplusObject(
  TITLE = "model0 - unconditional CFA model", 
  VARIABLE = 
    "usevar = stolen-rac_fght;", 
  
  ANALYSIS = 
    "estimator = mlr;",
  
  MODEL = 
    "VICTIM by stolen* t_hurt p_fight hit damaged bullied;
     VICTIM@1; ! UVI identification
     
     NEG_CLIM by unsafe* disrupt gangs rac_fght;
     NEG_CLIM@1; ",
  
  PLOT = "type = plot3;",
  OUTPUT = "sampstat standardized residual modindices (3.84);",
  
  usevariables = colnames(invar_data), 
  rdata = invar_data)

cfa_m0_fit <- mplusModeler(cfa_m0, 
                            dataout=here("invar_mplus", "lab9_invar_data.dat"),
                            modelout=here("invar_mplus", "M0_CFA_fullsample.inp"),
                            check=TRUE, run = TRUE, hashfilename = FALSE)

3 Run separate CFA models for each sub-sample


3.1 Group freelnch = 0 (low) CFA

cfa_m1  <- mplusObject(
  TITLE = "CFA model1 - group is 0 for freelnch", 
  VARIABLE = 
    "usevar = stolen-rac_fght;
     
     !freelnch (0 = school proportion is less than 11 percent)
     USEOBS = freelnch == 0; ", 
  
  ANALYSIS = 
    "estimator = mlr;",
  
  MODEL = 
    "VICTIM by stolen* t_hurt p_fight hit damaged bullied;
     VICTIM@1; ! UVI identification
     
     NEG_CLIM by unsafe* disrupt gangs rac_fght;
     NEG_CLIM@1; ",
  
  PLOT = "type = plot3;",
  OUTPUT = "sampstat standardized residual modindices (3.84);",
  
  usevariables = colnames(invar_data), 
  rdata = invar_data)

cfa_m1_fit <- mplusModeler(cfa_m1, 
                            dataout=here("invar_mplus", "lab9_invar_data.dat"),
                            modelout=here("invar_mplus", "M1_CFA_freelnch_0.inp"),
                            check=TRUE, run = TRUE, hashfilename = FALSE)

3.2 Group freelnch = 1 (moderate to high) CFA

cfa_m2  <- mplusObject(
  TITLE = "CFA model2 - group is 1 for freelnch", 
  VARIABLE = 
    "usevar = stolen-rac_fght;
     
     !freelnch (1 = school proportion is greater than or equal to 11 percent)
     USEOBS = freelnch == 1; ", 
  
  ANALYSIS = 
    "estimator = mlr;",
  
  MODEL = 
    "VICTIM by stolen* t_hurt p_fight hit damaged bullied;
     VICTIM@1; ! UVI identification
     
     NEG_CLIM by unsafe* disrupt gangs rac_fght;
     NEG_CLIM@1; ",
     
  PLOT = "type = plot3;",
  OUTPUT = "sampstat standardized residual modindices (3.84);",
  
  usevariables = colnames(invar_data), 
  rdata = invar_data)

cfa_m2_fit <- mplusModeler(cfa_m2, 
                            dataout=here("invar_mplus", "lab9_invar_data.dat"),
                            modelout=here("invar_mplus", "M2_CFA_freelnch_1.inp"),
                            check=TRUE, run = TRUE, hashfilename = FALSE)

4 ~~~~~~~~~ Multi-Group Invariance Models ~~~~~~~~~