Within R-studio under the files pane (bottom right):
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("rhdf5")
DATA SOURCE: This lab exercise utilizes the NCES public-use dataset: Education Longitudinal Study of 2002 (Lauff & Ingels, 2014) \(\color{blue}{\text{See website: nces.ed.gov}}\)
library(apaTables)
library(reshape2)
library(MplusAutomation)
library(rhdf5)
library(tidyverse)
library(here)
library(kableExtra)
Learning goal: The goal of this lab activity is to locate areas of the MplusAutomation code which will change depending on the particular data & modeling context.
# use data subset: "els_fa_ready_sub2.csv"
lab_data <- read_***(***("***", "***")) # What's missing?
https://cran.r-project.org/web/packages/apaTables/vignettes/apaTables.html
https://cran.r-project.org/web/packages/stargazer/vignettes/stargazer.pdf
# correlation table with means & SD
apa.cor.table(lab_data[,1:5], filename=here("figures", "Table_cor_lab3_APA.doc"), table.number=1)
# make a subset, keep all columns except (BYRACE & BYSTLANG)
schl_safe *** lab_data *** # What's missing?
***( )
# Reverse code the following columns 1-3, 5-7, 15-19 (by using collumn numbers)
cols = c(***) # What's missing?
# Reverse coding requires taking the range + 1 ...
schl_safe[,***] <- *** - schl_safe[ ,***] # What's missing?
## efa (indicators: school climate, safety, clear rules)
# What's missing?
m_efa_1 <- mplusObject(
TITLE = "FACTOR ANALYSIS EFA - LAB 2 DEMO",
VARIABLE =
"! within mplus you can choose a number of sequential columns
! using (var1_name - var_last_name)
usevar = BYS20A-***;
*** = *** == ***;",
ANALYSIS =
"type = *** ***;
estimator = ***;
parallel=50; ! run the parallel analysis for viewing with the eigenvalue elbow
",
MODEL = "" ,
PLOT = "type = plot3;",
OUTPUT = "sampstat;",
usevariables = colnames(***),
rdata = ***)
m_efa_1_fit <- mplusModeler(***,
dataout=here("***", "***.dat"),
modelout=here("***", "***.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
## END: EXPLORATORY FACTOR ANALYSIS
## efa reduced set - What's missing?
m_efa_1 <- *** (
TITLE = "FACTOR ANALYSIS EFA - REDUCED SET - LAB 2 DEMO",
VARIABLE =
"usevar =
***
! remove: BYS20C BYS20D
***
! remove:BYS20H BYS20I BYS20L
***
! remove: BYS21B
;",
ANALYSIS =
" *** = *** ***
*** = ***
*** = ***
",
MODEL = "" ,
PLOT = "*** = ***;",
OUTPUT = "***;",
usevariables = colnames(***),
rdata = ***)
m_efa_2_fit <- mplusModeler(***,
dataout=here("***", "***.dat"),
modelout=here("***", "***.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
## END: EXPLORATORY FACTOR ANALYSIS OF - REDUCED SET
loading_table <- tribble(
~"Items", ~"Factor 1", ~"Factor 2", ~"Factor 3",
#----------|-------------|------------|-----------|,
"***" , "***" , "***" , "***" ,
"***" , "***" , "***" , "***" ,
"***" , "***" , "***" , "***" ,
"***" , "***" , "***" , "***" ,
"***" , "***" , "***" , "***" ,
"***" , "***" , "***" , "***" ,
)
loading_table %>%
kable() %>%
kable_styling(latex_options = c("striped"),
full_width = F,
position = "left")
Items | Factor 1 | Factor 2 | Factor 3 |
---|---|---|---|
*** | *** | *** | *** |
*** | *** | *** | *** |
*** | *** | *** | *** |
*** | *** | *** | *** |
*** | *** | *** | *** |
*** | *** | *** | *** |
efa_summary <- readModels(here("efa_mplus", "lab3_efa2_female.out"))
x <- list(EFA=efa_summary[["gh5"]][["efa"]][["eigenvalues"]],
Parallel=efa_summary[["gh5"]][["efa"]][["parallel_average"]])
plot_data <- as_data_frame(x)
plot_data <- cbind(Factor = paste0(1:nrow(plot_data)), plot_data)
plot_data <- plot_data %>%
mutate(Factor = fct_inorder(Factor))
plot_data_long <- plot_data %>%
pivot_longer(EFA:Parallel, # The columns I'm gathering together
names_to = "Analysis", # new column name for existing names
values_to = "Eigenvalues") # new column name to store values
plot_data_long %>%
ggplot(aes(y=Eigenvalues,
x=Factor,
group=Analysis,
color=Analysis)) +
geom_point() +
geom_line() +
theme_minimal()
ggsave(here("figures", "eigenvalue_elbow_rplot.png"), dpi=300, height=5, width=7, units="in")
Hallquist, M. N., & Wiley, J. F. (2018). MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus. Structural equation modeling: a multidisciplinary journal, 25(4), 621-638.
Horst, A. (2020). Course & Workshop Materials. GitHub Repositories, https://https://allisonhorst.github.io/
Muthén, L.K. and Muthén, B.O. (1998-2017). Mplus User’s Guide. Eighth Edition. Los Angeles, CA: Muthén & Muthén
R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL http://www.R-project.org/
Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686