Within R-studio under the files pane (bottom right):
install.packages("semPlot")
install.packages("stargazer")
# Install previously used pacakages (only if your on a new computer!)
install.packages(c("semPlot",
                   "stargazer",
                   "MplusAutomation",
                   "tidyverse",
                   "here"))DATA SOURCE: This lab exercise utilizes the NCES public-use dataset: Education Longitudinal Study of 2002 (Lauff & Ingels, 2014) \(\color{blue}{\text{See website: nces.ed.gov}}\)
loading packages…
library(tidyverse)
library(MplusAutomation)
library(rhdf5)
library(here)
library(semPlot)
library(stargazer)read in data
trouble_data <- read_csv(here("data", "els_sub3_school_trouble.csv"))take a look at the EFA data (same indicators used for lab 4)
stargazer(as.data.frame(trouble_data), type="text", digits=1)replicate the SAME random split as last week (do not change the seed)
floor() helps with roundingsmp_size <- floor(0.50 * nrow(trouble_data))set the seed to make partition reproducible
set.seed(123)sample() randomly selects rows of size = “smp_size”
calibrate_smp <- sample(seq_len(nrow(trouble_data)), size = smp_size)create two samples called “calibrate” & “validate”
calibrate <- trouble_data[calibrate_smp, ]
validate <- trouble_data[-calibrate_smp, ]reorder variables & remove column number 15
# removed BYS24G (due to low loading < .2)
calibrate <-  calibrate %>% 
  select(1,3:14,2) 
# do the same for the validate sample
validate <-  validate %>% 
  select(1,3:14,2) save the calibrate & validate samples using “write_csv”
# save calibrate sample
write_csv(calibrate, here("data", "calibrate_trouble_sample.csv"))
# save validate sample
write_csv(validate, here("data", "validate_trouble_sample.csv"))efa_geomin  <- mplusObject(
  TITLE = "Geomin Oblique EFA - LAB 5 DEMO", 
  VARIABLE = 
    " ! removed BYS24G (due to low loading < .2)
    usevar = BYS22A-BYS22B;", 
  
  ANALYSIS = 
    "type = efa 1 3;   
     estimator = mlr;
     rotation = geomin; ! this is the default (added to be explicit)
     parallel=50; ! run parallel analysis",
  
  MODEL = "" ,
  
  PLOT = "type = plot3;",
  OUTPUT = "sampstat;",
  
  usevariables = colnames(calibrate), 
  rdata = calibrate)
efa_geomin_fit <- mplusModeler(efa_geomin, 
                            dataout=here("efa_mplus", "lab5_efa_geomin_oblique.dat"),
                            modelout=here("efa_mplus", "lab5_efa_geomin_oblique.inp"),
                            check=TRUE, run = TRUE, hashfilename = FALSE)efa_varimax  <- mplusObject(
  TITLE = "Varimax Orthogonal EFA - LAB 5 DEMO", 
  VARIABLE = 
    " ! removed BYS24G (due to low loading < .2)
    usevar = BYS22A-BYS22B;", 
  
  ANALYSIS = 
    "type = efa 1 3;   
     estimator = mlr;
     rotation = varimax; ! orthogonal (no factor correlations)
     parallel=50; ! run parallel analysis",
  
  MODEL = "" ,
  
  PLOT = "type = plot3;",
  OUTPUT = "sampstat;",
  
  usevariables = colnames(calibrate), 
  rdata = calibrate)
efa_varimax_fit <- mplusModeler(efa_varimax, 
                            dataout=here("efa_mplus", "lab5_efa_varimax_orthogonal.dat"),
                            modelout=here("efa_mplus", "lab5_efa_varimax_orthogonal.inp"),
                            check=TRUE, run = TRUE, hashfilename = FALSE)cfa_validate  <- mplusObject(
  TITLE = "Geomin Oblique EFA - LAB 5 DEMO", 
  VARIABLE = 
    "usevar = BYS22A-BYS22B;", 
  
  ANALYSIS = 
    "estimator = mlr;",
  
  MODEL = 
    "FACTOR_1 by BYS22A BYS22C BYS22D BYS22E BYS22F BYS22G BYS22H;
  
     FACTOR_2 BY BYS24A BYS24B BYS24C BYS24D BYS24E BYS24F BYS22B;" ,
  
  PLOT = "type = plot3;",
  OUTPUT = "sampstat standardized residual modindices (3.84);",
  
  usevariables = colnames(validate), 
  rdata = validate)
cfa_val_fit <- mplusModeler(cfa_validate, 
                            dataout=here("cfa_mplus", "lab5_cfa_validate.dat"),
                            modelout=here("cfa_mplus", "lab5_cfa_validate.inp"),
                            check=TRUE, run = TRUE, hashfilename = FALSE)# Read in the model to R within the "cfa_mplus" folder
cfa_output <- readModels(here("cfa_mplus"))
# Plot model:
semPaths(cfa_output,
         intercepts=FALSE,
         fixedStyle = c(1))play with some formatting
# Plot model:
semPaths(cfa_output, "std",
         intercepts=FALSE,
         fixedStyle = c(1),
         fade = FALSE,
         color= list(lat = c("light blue"," light green")),
        )Hallquist, M. N., & Wiley, J. F. (2018). MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus. Structural equation modeling: a multidisciplinary journal, 25(4), 621-638.
Horst, A. (2020). Course & Workshop Materials. GitHub Repositories, https://https://allisonhorst.github.io/
Muthén, L.K. and Muthén, B.O. (1998-2017). Mplus User’s Guide. Eighth Edition. Los Angeles, CA: Muthén & Muthén
R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL http://www.R-project.org/
Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686