Download repository here: \(\color{blue}{\text{https://github.com/garberadamc/SEM-Lab3}}\)
On the Github repository webpage:
fork
your own branch
of the lab repositoryclone or download
menuWithin R-Studio:
Version Control
Git
clone or download
menu on Github pageThe first example utilizes the Vocabulary and Education dataset from the National Opinion Research Center General Social Survey. GSS Cumulative Datafile 1972-2016 (Fox, 2008) \(\color{blue}{\text{See documentation here}}\)
This dataset is available via the R-package {carData
} and can be directly loaded into the R environment.
Note: All models specified in the following exercise are for demonstration only and are not theoretically justified or valid.
# equatiomatic is not yet on CRAN. Install the development version from GitHub with
remotes::install_github("datalorax/equatiomatic", force = TRUE)
library(tidyverse)
library(MplusAutomation)
library(rhdf5)
library(here)
library(gt)
library(gtsummary)
library(estimatr)
library(kableExtra)
library(hrbrthemes)
library(equatiomatic)
library(effects)
library(carData)
library(Ecdat)
library(huxtable)
library(flair)
Read the dataframe into your R-environment from package {carData
}
data(Vocab)
vocab <- as.data.frame(Vocab)
tribble
table
Name | Labels |
---|---|
year | Year of the survey |
sex | Sex of the respondent (Female or Male) |
education | Education, in years |
vocabulary | Vocabulary test score: number correct on a 10-word test |
check some basic descriptives with the {gtsummary
} package
table1 <- tbl_summary(vocab,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
missing = "no" ) %>%
bold_labels()
table1
Characteristic | N = 303511 |
---|---|
year | 1995 (13) |
sex | |
Female | 17148 (56%) |
Male | 13203 (44%) |
education | 13.0 (3.0) |
vocabulary | 6.00 (2.12) |
1
Statistics presented: mean (SD); n (%)
|
Run a regression of the model with interaction of sex
and education
using the lm
function
m1_interact <- lm(formula =
vocabulary ~ sex +
education +
sex:education ,
data=vocab)
Print summary of regression output using package {huxtable
}
huxreg(m1_interact, error_pos = 'right')
(1) | ||
(Intercept) | 1.788 *** | (0.064) |
sexMale | -0.277 ** | (0.094) |
education | 0.329 *** | (0.005) |
sexMale:education | 0.008 | (0.007) |
N | 30351 | |
R2 | 0.231 | |
logLik | -61825.557 | |
AIC | 123661.113 | |
*** p < 0.001; ** p < 0.01; * p < 0.05. |
Print the Latex syntax for the regression equation using the package {equatiomatic
}
extract_eq(m1_interact)
\[ \color{#FF851B}{\text{vocabulary}} = \alpha + \beta_{1}(\color{#0074D9}{\text{sex}_{\text{Male}}}) + \beta_{2}(\color{#2ECC40}{\text{education}}) + \beta_{3}(\color{#0074D9}{\text{sex}_{\text{Male}}} \times \color{#2ECC40}{\text{education}}) + \epsilon \]
extract_eq(m1_interact, use_coefs = TRUE)
\[ \color{#FF851B}{\text{vocabulary}} = 1.79 - 0.28(\color{#0074D9}{\text{sex}_{\text{Male}}}) + 0.33(\color{#2ECC40}{\text{education}}) + 0.01(\color{#0074D9}{\text{sex}_{\text{Male}}} \times \color{#2ECC40}{\text{education}}) + \epsilon \]
Plot the interaction effect using the package {effects
}
int <- effect("sex:education",m1_interact)
plot(int, main="", grid=TRUE,
x.var = "education", xlab="Education",
ylab="Vocabulary", multiline=TRUE, confint=list(style="auto"))
Figure. Example 1 interaction is non-significant (no moderation)
Filter year to include 1974 and 2016 (emphasizing moderation effect)
vocab2 <- vocab %>%
filter(year %in% c(1974, 2016)) %>%
mutate(year = droplevels(factor(year)))
Run regression with interaction between year
and education
# below is R-code to center covariate 'education'
# vocab2 <- vocab %>%
# mutate(education = scale(education, scale = FALSE)) %>%
# mutate(year = scale(year, scale = FALSE))
m2_interact <- lm(formula =
vocabulary ~ sex +
education +
year +
year:education ,
data=vocab2)
huxreg(m2_interact, error_pos = 'right')
(1) | ||
(Intercept) | 1.600 *** | (0.184) |
sexMale | -0.073 | (0.062) |
education | 0.375 *** | (0.015) |
year2016 | 0.373 | (0.272) |
education:year2016 | -0.078 *** | (0.021) |
N | 3307 | |
R2 | 0.244 | |
logLik | -6607.022 | |
AIC | 13226.043 | |
*** p < 0.001; ** p < 0.01; * p < 0.05. |
Print the Latex syntax for the regression equation using the package {equatiomatic
}
extract_eq(m2_interact)
\[ \color{#FF851B}{\text{vocabulary}} = \alpha + \beta_{1}(\color{#0074D9}{\text{sex}_{\text{Male}}}) + \beta_{2}(\color{#2ECC40}{\text{education}}) + \beta_{3}(\color{#39CCCC}{\text{year}_{\text{2016}}}) + \beta_{4}(\color{#2ECC40}{\text{education}} \times \color{#39CCCC}{\text{year}_{\text{2016}}}) + \epsilon \]
extract_eq(m2_interact, use_coefs = TRUE)
\[ \color{#FF851B}{\text{vocabulary}} = 1.6 - 0.07(\color{#0074D9}{\text{sex}_{\text{Male}}}) + 0.38(\color{#2ECC40}{\text{education}}) + 0.37(\color{#39CCCC}{\text{year}_{\text{2016}}}) - 0.08(\color{#2ECC40}{\text{education}} \times \color{#39CCCC}{\text{year}_{\text{2016}}}) + \epsilon \]
Plot the interaction effect using the package {effects
}
int2 <- effect("education:year", m2_interact)
plot(int2, main="", grid=TRUE,
x.var = "education", xlab="Education",
ylab="Vocabulary",
multiline=TRUE,
confint=list(style="auto"))
Figure. Example 2 interaction term is significant (moderation)
education
)year
)vocabulary
)education_2sd <- 2*sqrt(9.85) # 6.28
m1_model <- mplusObject(
TITLE = "m5 model indirect - Lab 3",
VARIABLE =
"usevar =
year ! covariate/moderator
education ! covariate
vocabulary ! outcome
int_yred; ! interaction of year and education",
DEFINE =
"center education (grandmean);
int_yred = year*education; ! create interaction term ",
ANALYSIS =
"estimator = MLR" ,
MODEL =
"[vocabulary](b0);
vocabulary on
year(b1)
education(b2)
int_yred(b3); " ,
MODELCONSTRAINT =
"LOOP(x,-1,1,0.01);
PLOT(y1974 y2016);
new(hi_y1974 lo_y1974 hi_y2016 lo_y2016 diff_hi);
y1974 = b0 + b2*x;
y2016 = b0 + b1 + (b2+b3)*x;
hi_y1974 = b0 + b2*(6.28);
lo_y1974 = b0 + b2*(-6.28);
hi_y2016 = b0 + b1 + (b2 + b3)*(6.28);
lo_y2016 = b0 + b1 + (b2 + b3)*(-6.28);
diff_hi = hi_y2016 - hi_y1974; ",
OUTPUT = "sampstat standardized modindices (3.84)",
PLOT = "type=plot3;",
usevariables = colnames(vocab2),
rdata = vocab2)
m1_model_fit <- mplusModeler(m1_model,
dataout=here("mplus_files", "Lab3.dat"),
modelout=here("mplus_files", "model1_Lab3.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
Extract the output parameters generated using the model constraint
simp_slope <- data.frame(m1_model_fit[["results"]][["parameters"]][["unstandardized"]]) %>%
filter(paramHeader == "New.Additional.Parameters") %>%
filter(param!= "DIFF_HI") %>%
select(param, est, se) %>%
mutate(year = case_when(
param %in% c("HI_Y1974", "LO_Y1974") ~ "1974",
param %in% c("HI_Y2016", "LO_Y2016") ~ "2016")) %>%
mutate(education = case_when(
param %in% c("HI_Y1974", "HI_Y2016") ~ 6.28,
param %in% c("LO_Y1974", "LO_Y2016") ~ -6.28))
Plot the interaction effect with ggplot
using theme from {hrbrthemes
} package
# un-center 'education' so values on x-axis are on the original scale
plot_data <- simp_slope %>% mutate(education = education + 12.9)
ggplot(plot_data, aes(x=education, y=est, color=year, group=year)) +
geom_point(size=0) +
geom_line() +
geom_errorbar(aes(ymin=est-se, ymax=est+se),
width=.2) +
scale_x_continuous(breaks = c(seq(6,20,2))) +
labs(title = "Simple Slopes Graph",
subtitle = "Vocabulary test score predicted by years of education in 1974 & 2016",
x = "Education (years)",
y = "Vocabulary test score") +
theme_ipsum()
ggsave(here("figures", "m1_simple_slope.png"), height = 6, width = 8)
The next example utilizes the Effects on Learning of Small Class Sizes (Star) dataset from the Introduction to Econometrics textbook. (Stock et al., 2003) \(\color{blue}{\text{See documentation here}}\)
This dataset is available via the R-package {Ecdat
} and can be directly loaded into the R environment.
Read the dataframe into your R-environment from package {Ecdat
}
data(Star)
star_data <- as.data.frame(Star)
Take a look at the variables in the Star
dataset
Name | Labels |
---|---|
tmathssk | total math scaled score |
treadssk | total reading scaled score |
classk | type of class (small, regular, regular with aide) |
totexpk | years of total teaching experience |
Subset and recode variables to use in moderation model with select
, mutate
, and case_when
mod_data <- star_data %>%
select(totexpk, # years of total teaching experience
classk, # type of class, a factor with levels (regular,small.class,regular.with.aide)
tmathssk, treadssk) %>%
mutate(classk = case_when(
classk == "small.class" ~ "small.class",
classk %in% c("regular.with.aide", "regular") ~ "regular")) %>%
mutate(classk = fct_rev(classk))
totexpk
)classk
)tmathssk
)treadssk
)teach_exp_2sd <- sqrt(33.261) # 5.77
m2_model <- mplusObject(
TITLE = "m2 model indirect - Lab 3",
VARIABLE =
"usevar =
totexpk classk
tmathssk, treadssk
tchXclas; ",
DEFINE =
"center totexpk (grandmean);
tchXclas = totexpk*classk; ! create interaction term" ,
ANALYSIS =
"estimator = mlr; ",
MODEL =
"treadssk on classk totexpk tchXclas;
[tmathssk](b0);
tmathssk on
classk (b1)
totexpk (b2)
tchXclas (b3); ",
MODELCONSTRAINT =
"LOOP(x,-1,1,0.01);
PLOT(small regular);
new(hi_small lo_small hi_regular lo_regular diff_hi);
small = b0 + b2*x;
regular = b0 + b1 + (b2+b3)*x;
hi_small = b0 + b2*(9.3);
lo_small = b0 + b2*(-9.3);
hi_regular = b0 + b1 + (b2 + b3)*(9.3);
lo_regular = b0 + b1 + (b2 + b3)*(-9.3);
diff_hi = hi_small - hi_regular; ",
OUTPUT = "sampstat standardized modindices (3.84)",
PLOT = "type=plot3;",
usevariables = colnames(mod_data),
rdata = mod_data)
m2_model_fit <- mplusModeler(m2_model,
dataout=here("mplus_files", "Lab3_caschools.dat"),
modelout=here("mplus_files", "model2_Lab2.inp"),
check=TRUE, run = TRUE, hashfilename = FALSE)
simp_slope2 <- data.frame(m2_model_fit[["results"]][["parameters"]][["unstandardized"]]) %>%
filter(paramHeader == "New.Additional.Parameters") %>%
filter(param!= "DIFF_HI") %>%
select(param, est, se) %>%
mutate(size = case_when(
param %in% c("HI_SMALL", "LO_SMALL") ~ "Small",
param %in% c("HI_REGUL", "LO_REGUL") ~ "Regular")) %>%
mutate(experience = case_when(
param %in% c("HI_SMALL", "HI_REGUL") ~ 9.3,
param %in% c("LO_SMALL", "LO_REGUL") ~ -9.3))
# un-center 'experience' so values on x-axis are on the original scale
mean_exp <- mean(mod_data$totexpk)
plot_data2 <- simp_slope2 %>% mutate(experience = experience + mean_exp)
ggplot(plot_data2, aes(x=experience, y=est, color=size, group=size)) +
geom_point(size=0) +
geom_line() +
geom_errorbar(aes(ymin=est-se, ymax=est+se), width=.25) +
scale_x_continuous( breaks = c(seq(0,18,2))) +
labs(title = "Simple Slopes Graph",
subtitle = "Math test score predicted by years of teaching experience in small & regular classrooms",
x = "Teaching Experience (years)",
y = "Math test score") +
theme_ipsum()
ggsave(here("figures", "m2_simple_slope.png"), height = 6, width = 8)