DATA SOURCE: This lab exercise utilizes the NCES public-use dataset: Education Longitudinal Study of 2002 (Lauff & Ingels, 2014) $$\color{blue}{\text{See website: nces.ed.gov}}$$

# load packages
library(MplusAutomation)
library(haven)
library(rhdf5)
library(tidyverse)
library(here)
library(corrplot)
library(kableExtra)
library(reshape2)
library(semPlot)

# 1 Lab 9 - Begin

lab_data <- read_csv(here("data", "els_sub5_data.csv"))

Preparations: subset,reorder, rename, and recode data

invar_data <-  lab_data %>%
select(bystlang, freelnch, byincome,                    # covariates
stolen, t_hurt, p_fight, hit, damaged, bullied,  # factor 1 (indicators)
safe, disrupt, gangs, rac_fght,                  # factor 2 (indicators)
late, skipped, mth_read, mth_test, rd_test) %>%
rename("unsafe" = "safe") %>%
mutate(
freelnch = case_when(    # Grade 10, percent free lunch - transform to binary
freelnch <  3 ~ 0,   # school has less than 11%
freelnch >= 3 ~ 1))  # school has greater than or equal to 11%

table(invar_data\$freelnch) # reasonably balanced groups

Take a quick look at variable distributions

melt(invar_data[,4:13]) %>%
ggplot(., aes(x=value, label=variable)) +
geom_histogram(bins = 15) +
facet_wrap(~variable, scales = "free")

Reverse code factor for ease of interpretation

cols = c("unsafe", "disrupt", "gangs", "rac_fght")

invar_data[ ,cols] <-  5 - invar_data[ ,cols]

Factor names and interpretation:

• VICTIM: student reports being a victim of injury to self or property
• scale range: Never, Once or twice, More than twice
• higher values indicate greater frequency of victimization events
• NEG_CLIM: Student reports on negative school climate attributes
• scale range: Strongly Disagree - Strongly Agree
• higher values indicate a more negative climate

Check correct coding, explore correlations

cor_matrix <- cor(invar_data[4:13], use = "pairwise.complete.obs")

corrplot(cor_matrix,
method = "circle",
type = "upper")

# 2 Estimate the Unconditional Confirmatory Factor Analysis (CFA) model

Number of parameters = 31

• 10 intercepts
• 10 residual variances
• 01 factor co-variances
cfa_m0  <- mplusObject(
TITLE = "model0 - unconditional CFA model",
VARIABLE =
"usevar = stolen-rac_fght;",

ANALYSIS =
"estimator = mlr;",

MODEL =
"VICTIM by stolen* t_hurt p_fight hit damaged bullied;
VICTIM@1; ! UVI identification

NEG_CLIM by unsafe* disrupt gangs rac_fght;
NEG_CLIM@1; ",

PLOT = "type = plot3;",
OUTPUT = "sampstat standardized residual modindices (3.84);",

usevariables = colnames(invar_data),
rdata = invar_data)

cfa_m0_fit <- mplusModeler(cfa_m0,
dataout=here("invar_mplus", "lab9_invar_data.dat"),
modelout=here("invar_mplus", "M0_CFA_fullsample.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)

# 3 Run separate CFA models for each sub-sample

## 3.1 Group freelnch = 0 (low) CFA

cfa_m1  <- mplusObject(
VARIABLE =
"usevar = stolen-rac_fght;

!freelnch (0 = school proportion is less than 11 percent)
USEOBS = freelnch == 0; ",

ANALYSIS =
"estimator = mlr;",

MODEL =
"VICTIM by stolen* t_hurt p_fight hit damaged bullied;
VICTIM@1; ! UVI identification

NEG_CLIM by unsafe* disrupt gangs rac_fght;
NEG_CLIM@1; ",

PLOT = "type = plot3;",
OUTPUT = "sampstat standardized residual modindices (3.84);",

usevariables = colnames(invar_data),
rdata = invar_data)

cfa_m1_fit <- mplusModeler(cfa_m1,
dataout=here("invar_mplus", "lab9_invar_data.dat"),
modelout=here("invar_mplus", "M1_CFA_freelnch_0.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)

## 3.2 Group freelnch = 1 (moderate to high) CFA

cfa_m2  <- mplusObject(
VARIABLE =
"usevar = stolen-rac_fght;

!freelnch (1 = school proportion is greater than or equal to 11 percent)
USEOBS = freelnch == 1; ",

ANALYSIS =
"estimator = mlr;",

MODEL =
"VICTIM by stolen* t_hurt p_fight hit damaged bullied;
VICTIM@1; ! UVI identification

NEG_CLIM by unsafe* disrupt gangs rac_fght;
NEG_CLIM@1; ",

PLOT = "type = plot3;",
OUTPUT = "sampstat standardized residual modindices (3.84);",

usevariables = colnames(invar_data),
rdata = invar_data)

cfa_m2_fit <- mplusModeler(cfa_m2,
dataout=here("invar_mplus", "lab9_invar_data.dat"),
modelout=here("invar_mplus", "M2_CFA_freelnch_1.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)