University of California, Santa Barbara
Lab preparation
Creating a version-controlled R-Project with Github
Download repository here: https://github.com/garberadamc/SEM-Lab6
On the Github repository webpage:
fork
your ownbranch
of the lab repository- copy the repository web URL address from the
clone or download
menu
Within R-Studio:
- click “NEW PROJECT”
- choose option
Version Control
- choose option
Git
- paste the repository web URL path copied from the
clone or download
menu on Github page - choose location of the R-Project (too many nested folders will result in filepath error)
Data sources:
The first 3 models utilize a public use data subset the Longitudinal Survey of American Youth (LSAY) \(\color{blue}{\text{See documentation here}}\)
The 4th model utilizes a public use data subset the High School Longitudinal Study (HSLS) \(\color{blue}{\text{See documentation here}}\)
Load packages
library(gganimate)
library(hrbrthemes)
library(tidyverse)
library(haven)
library(janitor)
library(MplusAutomation)
library(rhdf5)
library(here)
library(kableExtra)
library(gtsummary)
library(semPlot)
LSAY
data example - Math Scores
across 6 timepoints
Read in data
lsay_data <- read_spss(here("data", "LSAY_Lab6.sav")) %>% select(-starts_with("AB"),
ends_with("IMP"), -contains("BIO"), -contains("PHY"), -contains("SCI"), FATHED,
MOTHED) %>% clean_names() %>% rename(math_07 = amthimp, math_08 = cmthimp, math_09 = emthimp,
math_10 = gmthimp, math_11 = imthimp, math_12 = kmthimp)
lsay_data[lsay_data == 9999] <- NA
View metadeta
Write a CSV
file
Read in the CSV
file (SPSS labels removed)
Table. LSAY repeated measures
Name | Labels |
---|---|
math_07 | 7th grade math score (imputed) |
math_08 | 8th grade math score (imputed) |
math_09 | 9th grade math score (imputed) |
math_10 | 10th grade math score (imputed) |
math_11 | 11th grade math score (imputed) |
math_12 | 12th grade math score (imputed) |
Model 1 - Latent growth model with fixed time effects
(equal intervals)
m1_growth <- mplusObject(
TITLE = "m1 growth model fixed time scores - Lab 6",
VARIABLE =
"usevar =
math_07-math_12; ",
ANALYSIS =
"estimator = ML" ,
MODEL =
"i s | math_07@0 math_08@1 math_09@2 math_10@3 math_11@4 math_12@5; " ,
OUTPUT = "sampstat standardized;",
PLOT = "type=plot3;
series = math_07-math_12(*)",
usevariables = colnames(lsay_lab6),
rdata = lsay_lab6)
m1_growth_fit <- mplusModeler(m1_growth,
dataout=here("mplus_files", "Lab6.dat"),
modelout=here("mplus_files", "m1_growth_Lab6.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
Load in the mplus.R
functions
## [1] "Loaded rhdf5 package"
Plotting using gh5
plot data generated by Mplus
- View plots available for a given model
- Generate plots using the
get.plot.___
function - Extract data and transform to tidy format
- Plot with
ggplot
Prepare plot data
observed <- lsay_lab6 %>% select(starts_with("math")) %>%
rownames_to_column() %>% drop_na()
obs100 <- observed[1:100,]
plot_obs <- obs100 %>%
pivot_longer(`math_07`:`math_12`, # The columns I'm gathering together
names_to = "grade", # new column name for existing names
values_to = "value") # new column name to store values
gradelevels <- colnames(observed[,2:7])
mean_est <- as.data.frame(mplus.get.estimated_means(here("mplus_files", "m1_growth_Lab6.gh5"))) %>%
mutate(grade = gradelevels)
Plot the model estimated means superimposted on the obserbed individual values
growth_plot <- ggplot() +
geom_point(data = plot_obs, aes(x = grade, y = value, group = rowname), alpha = .3) + #
geom_line(data = plot_obs, aes(x = grade, y = value, group = rowname), alpha = .3) + #
geom_point(data=mean_est, aes(x=grade, y = V1), color = "Blue", size = 1.5) + #
geom_line(data=mean_est, aes(x=grade, y = V1, group = 1), color = "Blue", size = 1.2) + #
scale_x_discrete(labels = c("7", "8", "9", "10", "11", "12")) + #
labs(x="Grade", y="Math Score") + #
theme_minimal()
growth_plot
Animate the plot with {gganimate
}
growth_plot + transition_states(rowname, transition_length = 1, state_length = 1) + #
shadow_mark(color = "Magenta", alpha = .3) #
Model 2 - Latent growth model with freely estimated time scores
(level-shape model or latent basis model)
m2_growth <- mplusObject(
TITLE = "m2 growth model freely estimated time scores - Lab 6",
VARIABLE =
"usevar =
math_07-math_12; ",
ANALYSIS =
"estimator = ML" ,
MODEL =
"i s | math_07@0 math_08@1 math_09* math_10* math_11* math_12*; " ,
OUTPUT = "sampstat standardized;",
PLOT = "type=plot3;
series = math_07-math_12(*)",
usevariables = colnames(lsay_lab6),
rdata = lsay_lab6)
m2_growth_fit <- mplusModeler(m2_growth,
dataout=here("mplus_files", "Lab6.dat"),
modelout=here("mplus_files", "m2_growth_Lab6.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
Prepare plot data
mean_est2 <- as.data.frame(mplus.get.estimated_means(here("mplus_files", "m2_growth_Lab6.gh5"))) %>%
mutate(grade = gradelevels)
Plot the model estimated means superimposted on the obserbed individual values
growth_plot <- ggplot() +
geom_point(data = plot_obs, aes(x = grade, y = value, group = rowname), color = "lightblue", alpha = .3) + #
geom_line(data = plot_obs, aes(x = grade, y = value, group = rowname), color = "lightblue", alpha = .3) + #
geom_point(data=mean_est2, aes(x=grade, y = V1), color = "magenta", size = 1.5) + #
geom_line(data=mean_est2, aes(x=grade, y = V1, group = 1), color = "magenta", size = 1.2) + #
scale_x_discrete(labels = c("7", "8", "9", "10", "11", "12")) + #
labs(x="Grade", y="Math Score") + #
theme_minimal() #
growth_plot
Model 3 - Latent growth model with covariate and freely estimated time scores
m3_growth <- mplusObject(
TITLE = "m3 growth model with covariate and freely estimated time scores - Lab 6",
VARIABLE =
"usevar =
math_07-math_12 fathed; ",
ANALYSIS =
"estimator = ML" ,
DEFINE = "center fathed (grandmean);",
MODEL =
"i s | math_07@0 math_08@1 math_09* math_10* math_11* math_12*;
i s on fathed; " ,
OUTPUT = "sampstat standardized;",
PLOT = "type=plot3;
series = math_07-math_12(*)",
usevariables = colnames(lsay_lab6),
rdata = lsay_lab6)
m3_growth_fit <- mplusModeler(m3_growth,
dataout=here("mplus_files", "Lab6.dat"),
modelout=here("mplus_files", "m3_growth_Lab6.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
Check the path diagram of the model with {semPlot
}
## Reading model: /Users/agarber/Desktop/SEM_S20/Lab6_SEM/mplus_files/m3_growth_Lab6.out
semPaths(m3_output, "est", intercepts = FALSE, residuals = FALSE, fade = FALSE, edge.color = "black",
edgeLabels = "")
HSLS
data example - Academic expectations
Table. HSLS repeated measures
Question stem -Highest level of education expected...
Name | Labels | Levels |
---|---|---|
s1eduexp | 9th grade (2009) | 1 = less HS, 2 = HS, 3 = Bach, 5 = Master, 6 = Ph.D |
s2eduexp | 11th grade (2012) | 1 = less HS, 2 = HS, 3 = Bach, 5 = Master, 6 = Ph.D |
s4eduexp | 3 years post high school (2016) | 1 = less HS, 2 = HS, 3 = Bach, 5 = Master, 6 = Ph.D |
Model 4 - Latent growth model with categorical outcomes
m4_growth <- mplusObject(
TITLE = "m4 growth model - HSLS - Lab 6",
VARIABLE =
"usevar = s1eduexp-s4eduexp;
categorical = s1eduexp-s4eduexp;",
ANALYSIS = "" ,
MODEL =
"! 0=09 1=10 2=11 3=12 | 4=13 5=14 6=15 7=16
i s | s1eduexp@0 s2eduexp@3 s4eduexp@7; ",
OUTPUT = "sampstat standardized;",
PLOT = "type=plot3;
series = s1eduexp-s4eduexp(*);",
usevariables = colnames(hsls_rep),
rdata = hsls_rep)
m4_growth_fit <- mplusModeler(m4_growth,
dataout=here("mplus_files", "Lab6.dat"),
modelout=here("mplus_files", "m4_growth_Lab6.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
Prepare plot data
loop_data <- lapply(1:6, function(k) {
probs <- mplus.get.estimated_probabilities(here("mplus_files", "m4_growth_Lab6.gh5"),'process1',k,k)
loop_data <- as.data.frame(probs) %>%
mutate(cat = factor(k))
})
plot_data <- bind_rows(loop_data)
observed <- hsls_rep %>% select(contains("eduexp")) %>%
rownames_to_column() %>% drop_na()
obs100 <- observed[1:100,]
plot_obs <- obs100 %>%
pivot_longer(`s1eduexp`:`s4eduexp`, # The columns I'm gathering together
names_to = "year", # new column name for existing names
values_to = "value") %>% # new column name to store values
mutate(year = case_when(
year == "s1eduexp" ~ 1,
year == "s2eduexp" ~ 2,
year == "s4eduexp" ~ 3,
))
yearlevels <- colnames(observed[,2:4])
prob_est <- plot_data %>%
mutate(year = rep(1:3, 6))
Plot the model estimated probabilities (categorical outcomes
)
ggplot(data=prob_est, aes(x=year, y=V1, fill=cat)) + #
geom_area(alpha=0.3 , size=.4, colour="black") + #
scale_x_continuous(breaks = 1:3, #
labels = c("9th grade (2009)","11th grade (2012)","3 years post-HS (2016)")) + #
scale_y_continuous("Probability") + #
scale_fill_discrete("", #
labels = c("< High School", "High School", "Associates", "Bachelor", "Masters", "Ph.D")) + #
labs(title="Highest level of education expected", #
subtitle = "High School Longitudinal Study (N=21,758)", y="Probability", x="") + #
theme_ipsum() #
Create an animated plot with {gganimate
}
cat_plot <- ggplot(data = plot_obs, aes(x = year, y = value, group = rowname)) + #
geom_jitter(color = "black", alpha = 0, width = 0.1, height = .3) + #
geom_line(color = "black") + #
scale_x_continuous(breaks = 1:3, #
labels = c("9th grade (2009)","11th grade (2012)","3 years post-HS (2016)")) + #
scale_y_reverse(breaks = 1:6, labels = c("< HS", "HS", "AA", "BA/BS", "MA", "Ph.D")) + #
theme_ipsum() + theme(panel.grid.minor = element_blank()) + #
labs(y="", title="Highest level of education expected", #
subtitle = "High School Longitudinal Study (N=100, sub-sample)") #
cat_plot + transition_states(rowname, transition_length = 3, state_length = 3) + #
shadow_mark(color = "blue", alpha = .15) #
References
Hallquist, M. N., & Wiley, J. F. (2018). MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus. Structural equation modeling: a multidisciplinary journal, 25(4), 621-638.
Ingels, S. J., Pratt, D. J., Herget, D. R., Burns, L. J., Dever, J. A., Ottem, R., … & Leinwand, S. (2011). High School Longitudinal Study of 2009 (HSLS: 09): Base-Year Data File Documentation. NCES 2011-328. National Center for Education Statistics.
Miller, J. D., Hoffer, T., Suchner, R., Brown, K., & Nelson, C. (1992). LSAY codebook. Northern Illinois University.
Muthén, B. O., Muthén, L. K., & Asparouhov, T. (2017). Regression and mediation analysis using Mplus. Los Angeles, CA: Muthén & Muthén.
Muthén, L.K. and Muthén, B.O. (1998-2017). Mplus User’s Guide. Eighth Edition. Los Angeles, CA: Muthén & Muthén
R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL http://www.R-project.org/
Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686