Simulate response patterns under the framework of IRT
Using mirt package
R
Simulation
Mirt
Author
Jihong Zhang
Published
April 15, 2024
1 Research question:
How different methods of cutting item response affecting parameter estimation accuracy?
Merge first two categories
Merge the middle category into the neighboring category
We need to pre-specify a (item slopes) and b (item intercepts). mu (factor mean) and sigma (factor correlation) are optional.
library(mirt)# Simulation for 6 5-category itemsset.seed(1234)J =6# number of itemsN =1000# number of participantsa <-matrix(rlnorm(J, mean =0, sd =1), ncol =1)a
# for the graded model, ensure that there is enough space between the intercepts,# otherwise closer categories will not be selected often (minimum distance of 0.3 here)diffs <-t(apply(matrix(runif(J*4, .3, 1), nrow = J), 1, cumsum))d <--diffs +rnorm(J) +4d
rev_bias <-function(true, est) { diffs =abs(true - est)mean(diffs)}rmse <-function(true, est) {sqrt(mean((true - est)^2))}Results <-data.frame(Bias =c(rev_bias(true =as.numeric(a), est = mod1_param[,'a1']),rev_bias(true =as.numeric(a), est = mod2_param[,'a1']),rev_bias(true =as.numeric(a), est = mod3_param[,'a1']) ),RMSE =c(rmse(true =as.numeric(a), est = mod1_param[,'a1']),rmse(true =as.numeric(a), est = mod2_param[,'a1']),rmse(true =as.numeric(a), est = mod3_param[,'a1']) ))rownames(Results) <-paste0("Model ", 1:3)kableExtra::kable(Results, digits =3)
Bias
RMSE
Model 1
0.138
0.181
Model 2
0.148
0.192
Model 3
0.144
0.199
5 Item fit
Model 1 (population model) seems to have overall best item fits. Model 2 (tail merged model) seems to have most worst-fitting items. Model 3 (middle-range merged model) seems to have the worst-fitting item (item 6).
library(parSim)Results <-parSim(J =c(6, 10, 20),N =c(300, 500, 1000),# Number of repititions?reps =100, # more is better!# Parallel?nCores =8,export =c("rev_bias"),expression = {library(mirt) a <-matrix(rlnorm(J, mean =0, sd =1), ncol =1) diffs <-t(apply(matrix(runif(J*4, .3, 1), nrow = J), 1, cumsum)) d <--diffs +rnorm(J) +4 dat1 <-simdata(a, d, N, itemtype =rep('graded', J)) dat2 <- dat1 dat2[dat2%in%0:1] <-0 dat2[dat2==2] <-1 dat2[dat2==3] <-2 dat2[dat2==4] <-3 dat3 <- dat1 dat3[dat3%in%1:2] <-1 dat3[dat3==3] <-2 dat3[dat3==4] <-3## Population model mod1 <-mirt(dat1, model =1) mod1_param <-coef(mod1, simplify =TRUE)$items mod1_gfi <-M2(mod1, "C2")## Tail merge model: model2 mod2 <-mirt(dat2, model =1) mod2_param <-coef(mod2, simplify =TRUE)$items mod2_gfi <-M2(mod2, "C2")## middle category merge model: model3 mod3 <-mirt(dat3, model =1) mod3_param <-coef(mod3, simplify =TRUE)$items mod3_gfi <-M2(mod3, "C2")## outputdata.frame(bias_mod1 =rev_bias(true =as.numeric(a), est = mod1_param[,'a1']),bias_mod2 =rev_bias(true =as.numeric(a), est = mod2_param[,'a1']),bias_mod3 =rev_bias(true =as.numeric(a), est = mod3_param[,'a1']),RMSEA_mod1 = mod1_gfi$RMSEA,RMSEA_mod2 = mod2_gfi$RMSEA,RMSEA_mod3 = mod3_gfi$RMSEA,M2_mod1 = mod1_gfi$M2,M2_mod2 = mod2_gfi$M2,M2_mod3 = mod3_gfi$M2,SRMSR_mod1 = mod1_gfi$SRMSR,SRMSR_mod2 = mod2_gfi$SRMSR,SRMSR_mod3 = mod3_gfi$SRMSR,TLI_mod1 = mod1_gfi$TLI,TLI_mod2 = mod2_gfi$TLI,TLI_mod3 = mod3_gfi$TLI ) })
library(ggplot2)library(dplyr)library(tidyr)Results_plot <- Results |>pivot_longer(c(starts_with("bias"), starts_with("RMSEA"), starts_with("SRMSR"), starts_with("TLI")), names_to ='Model', values_to ='Value') |>separate(Model, into =c("Measure", "Model")) |>group_by(J, N, Model, Measure) |>summarise(Value =mean(Value, na.rm = T)) |>mutate(N =factor(N, levels =c(300, 500, 1000)))Results_plot |>ggplot() +geom_path(aes(x = N, y = Value, col = Model, group = Model)) +geom_point(aes(x = N, y = Value, col = Model)) +facet_grid(Measure ~ J, scales ="free") +theme_bw()
8 Conclusion
There are not much differences in global model fits regarding varied cutting methods of Likert-scale item responses under the conditions in the simulation.
Increases of test length and sample sizes will increase estimation accuracy for both cutting methods.
Item fits will differ by varied cutting methods.
Goodness-of-fit may not perform well evaluating cutting methods of item responses.