Within R-studio under the files pane (bottom right):
New packages this week:
janitor
}haven
}if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("rhdf5")
DATA SOURCE: This lab exercise utilizes the NCES public-use dataset: Education Longitudinal Study of 2002 (Lauff & Ingels, 2014) \(\color{blue}{\text{See website: nces.ed.gov}}\)
library(janitor)
library(tidyverse)
library(haven)
library(MplusAutomation)
library(rhdf5)
library(here)
library(corrplot)
lab_data <- read_spss(here("data", "els_sub1_spss.sav"))
school_trouble <- lab_data %>%
select(41:55)
sjPlot::view_df(school_trouble)
write_csv(school_trouble, here("data", "school_trouble_data.csv"))
trouble_data <- read_csv(here("data", "school_trouble_data.csv"))
cor_matrix <- cor(trouble_data, use = "pairwise.complete.obs")
corrplot(cor_matrix, method="circle",
type = "upper")
smp_size <- floor(0.50 * nrow(trouble_data))
set.seed(123)
calibrate_smp <- sample(seq_len(nrow(trouble_data)), size = smp_size)
calibrate <- trouble_data[calibrate_smp, ]
validate <- trouble_data[-calibrate_smp, ]
m_efa_1 <- mplusObject(
TITLE = "School Trouble EFA - LAB 4 DEMO",
VARIABLE =
"usevar = BYS22A-BYS24G;",
ANALYSIS =
"type = efa 1 5;
estimator = mlr;
parallel=50; ! run parallel analysis",
MODEL = "" ,
PLOT = "type = plot3;",
OUTPUT = "sampstat;",
usevariables = colnames(calibrate),
rdata = calibrate)
m_efa_1_fit <- mplusModeler(m_efa_1,
dataout=here("efa_mplus", "lab4_efa1_trouble.dat"),
modelout=here("efa_mplus", "lab4_efa1_trouble.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
efa_summary <- readModels(here("efa_mplus", "lab4_efa1_trouble.out"))
x <- list(EFA=efa_summary[["gh5"]][["efa"]][["eigenvalues"]],
Parallel=efa_summary[["gh5"]][["efa"]][["parallel_average"]])
plot_data <- as_data_frame(x)
plot_data <- cbind(Factor = paste0(1:nrow(plot_data)), plot_data)
plot_data <- plot_data %>%
mutate(Factor = fct_inorder(Factor))
plot_data_long <- plot_data %>%
pivot_longer(EFA:Parallel, # The columns I'm gathering together
names_to = "Analysis", # new column name for existing names
values_to = "Eigenvalues") # new column name to store values
plot_data_long %>%
ggplot(aes(y=Eigenvalues,
x=Factor,
group=Analysis,
color=Analysis)) +
geom_point() +
geom_line() +
theme_minimal()
ggsave(here("figures", "eigenvalue_elbow_rplot.png"), dpi=300, height=5, width=7, units="in")
m_efa <- lapply(1:5, function(k) {
m_efa2 <- mplusObject(
TITLE = "School Trouble EFA - LAB 4 DEMO",
VARIABLE =
"usevar = BYS22A-BYS24G;",
ANALYSIS =
paste("type=efa", k, k),
MODEL = "" ,
PLOT = "type = plot3;",
OUTPUT = "sampstat;",
usevariables = colnames(calibrate),
rdata = calibrate)
m_efa_2_fit <- mplusModeler(m_efa2,
dataout=sprintf(here("efa_mplus2", "efa_trouble.dat"), k),
modelout=sprintf(here("efa_mplus2", "efa_%d_trouble.inp"), k),
check=TRUE, run = TRUE, hashfilename = FALSE)
})
hsls_raw <- read_spss(here("data", "hsls_16_student_sub_v1.sav"))
hsls_tidy <- hsls_raw %>%
clean_names()
hsls_x1 <- hsls_tidy %>%
select(starts_with("x1")) # columns with first 2 characters "x1"
hsls_not_sex <- hsls_tidy %>%
select(!ends_with("sex")) # columns that do NOT end with "sex"
hsls_science <- hsls_tidy %>%
select(contains("sci")) # columns that contain characters "sci"
hsls_math <- hsls_tidy %>%
select(contains(c("mth" , "math"))) # columns that contain "mth" or "math"
hsls_math_sci <- hsls_tidy %>%
select(contains(c("mth" , "math", "sci"))) %>%
select(!starts_with("x1")) %>%
select(!ends_with("sex"))
names(hsls_math_sci) = str_sub(names(hsls_math_sci), 1, 8)
test.unique <- function(df) { ## function to identify unique columns
length1 <- length(colnames(df))
length2 <- length(unique(colnames(df)))
if (length1 - length2 > 0 ) {
print(paste("There are", length1 - length2, " duplicates", sep=" "))
}
}
test.unique(hsls_math_sci)
anyDuplicated(colnames(hsls_math_sci))
names(hsls_math_sci)
Hallquist, M. N., & Wiley, J. F. (2018). MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus. Structural equation modeling: a multidisciplinary journal, 25(4), 621-638.
Horst, A. (2020). Course & Workshop Materials. GitHub Repositories, https://https://allisonhorst.github.io/
Muthén, L.K. and Muthén, B.O. (1998-2017). Mplus User’s Guide. Eighth Edition. Los Angeles, CA: Muthén & Muthén
R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL http://www.R-project.org/
Wickham et al., (2019). Welcome to the tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686