This code provides (hopefully) useful code to extract goodness of fit statistics for log-linear models fitted via Poisson regression in R.
- Start with an arbitrary 5x5 contingency table and put it into dataframe format.
table <- matrix(round(runif(25,0,100),0),5) # creates arbitrary table
colnames(table) <- paste0("Y_",seq(1:5)) # Column names
rownames(table) <- paste0("X_",seq(1:5)) # Row names
df <- data.frame(as.table(table)) # transform table into a dataframe
head(df)
- Now, we fit a saturated model and a models of independence on these data.
model.indep <- glm(Freq ~ factor(Var1) + factor(Var2), family=poisson, data=df) # independence model
model.sat <- glm(Freq ~ factor(Var1)*factor(Var2), family=poisson, data=df) # saturated model
- The next step in standard log-linear analyses is to compare these models using various goodness-of-fit statistics (e.g. G2,AIC,BIC). The following function computes some common statistics from the
glm
objects containing the log-linear models.
gof <- function(freq,model) {
n <- sum(freq)
df <- model$df.residual
q <- model$df.null - model$df.residual
G2 <- model$deviance
D <- (sum(abs(freq-exp(predict(model))))/ (2*n))
AIC <- G2 - 2*df
BIC <- G2 + log(n)*q
stats <- c(df,G2,D,AIC,BIC)
names(stats) <- c("df","G2","D","AIC","BIC")
return(round(stats,3))
}
The function returns each model’s degrees of freedom (\(df\)), the log-likelihood ratio (\(G^2\)), dissimilarity index (\(D\)), Akaike information criterion (\(AIC\)) and Bayesian information criterion (\(BIC\)). For example:
# Gofs for independence model
gof(df$Freq,model.indep)
df G2 D AIC BIC
16.000 255.972 0.180 223.972 312.993
# Gofs for saturated model
gof(df$Freq,model.sat)
df G2 D AIC BIC
0.000 0.000 0.000 0.000 171.065
Now it’s easy to put them together into a table:
models <- c("model.indep","model.sat")
table.gof <- NULL # Empty table
for (i in models) {
tgof <- gof(df$Freq, eval(parse(text = i))) # Computes Goodness of fit statistics for each model
table.gof <- rbind(table.gof,tgof) # Piles stats for each model
}
rownames(table.gof) <- c("Independence","Saturated")
print(table.gof)
df G2 D AIC BIC
Independence 16 255.972 0.18 223.972 312.993
Saturated 0 0.000 0.00 0.000 171.065
LS0tCnRpdGxlOiAiR29vZG5lc3Mgb2YgZml0IHN0YXRpc3RpY3MgZm9yIGxvZy1saW5lYXIgbW9kZWxzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAotLS0KCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCgpUaGlzIGNvZGUgcHJvdmlkZXMgKGhvcGVmdWxseSkgdXNlZnVsIGNvZGUgdG8gZXh0cmFjdCBnb29kbmVzcyBvZiBmaXQgc3RhdGlzdGljcyBmb3IgbG9nLWxpbmVhciBtb2RlbHMgZml0dGVkIHZpYSBQb2lzc29uIHJlZ3Jlc3Npb24gaW4gUi4gCgoxLiBTdGFydCB3aXRoIGFuIGFyYml0cmFyeSA1eDUgY29udGluZ2VuY3kgdGFibGUgYW5kIHB1dCBpdCBpbnRvIGRhdGFmcmFtZSBmb3JtYXQuCgpgYGB7cn0KCXRhYmxlIDwtIG1hdHJpeChyb3VuZChydW5pZigyNSwwLDEwMCksMCksNSkgIyBjcmVhdGVzIGFyYml0cmFyeSB0YWJsZQoJY29sbmFtZXModGFibGUpIDwtIHBhc3RlMCgiWV8iLHNlcSgxOjUpKSAjIENvbHVtbiBuYW1lcwoJcm93bmFtZXModGFibGUpIDwtIHBhc3RlMCgiWF8iLHNlcSgxOjUpKSAjIFJvdyBuYW1lcwoJZGYgPC0gZGF0YS5mcmFtZShhcy50YWJsZSh0YWJsZSkpICMgdHJhbnNmb3JtIHRhYmxlIGludG8gYSBkYXRhZnJhbWUKCgloZWFkKGRmKQpgYGAKCjxicj4KCjIuIE5vdywgd2UgZml0IGEgc2F0dXJhdGVkIG1vZGVsIGFuZCBhIG1vZGVscyBvZiBpbmRlcGVuZGVuY2Ugb24gdGhlc2UgZGF0YS4KCmBgYHtyfQoJbW9kZWwuaW5kZXAgPC0gZ2xtKEZyZXEgfiBmYWN0b3IoVmFyMSkgKyBmYWN0b3IoVmFyMiksIGZhbWlseT1wb2lzc29uLCBkYXRhPWRmKSAjIGluZGVwZW5kZW5jZSBtb2RlbAoJbW9kZWwuc2F0ICAgPC0gZ2xtKEZyZXEgfiBmYWN0b3IoVmFyMSkqZmFjdG9yKFZhcjIpLCBmYW1pbHk9cG9pc3NvbiwgZGF0YT1kZikgICAjIHNhdHVyYXRlZCBtb2RlbApgYGAKCjxicj4KCjMuIFRoZSBuZXh0IHN0ZXAgaW4gc3RhbmRhcmQgbG9nLWxpbmVhciBhbmFseXNlcyBpcyB0byBjb21wYXJlIHRoZXNlIG1vZGVscyB1c2luZyB2YXJpb3VzIGdvb2RuZXNzLW9mLWZpdCBzdGF0aXN0aWNzIChlLmcuIEcyLEFJQyxCSUMpLiBUaGUgZm9sbG93aW5nIGZ1bmN0aW9uIGNvbXB1dGVzIHNvbWUgY29tbW9uIHN0YXRpc3RpY3MgZnJvbSB0aGUgYGBgZ2xtYGBgIG9iamVjdHMgY29udGFpbmluZyB0aGUgbG9nLWxpbmVhciBtb2RlbHMuICAgCgpgYGB7cn0KCWdvZiA8LSBmdW5jdGlvbihmcmVxLG1vZGVsKSB7CgkJbiAgICA8LSBzdW0oZnJlcSkgCgkJZGYgICA8LSBtb2RlbCRkZi5yZXNpZHVhbAoJCXEgICAgPC0gbW9kZWwkZGYubnVsbCAtIG1vZGVsJGRmLnJlc2lkdWFsCgkJRzIgICA8LSBtb2RlbCRkZXZpYW5jZQoJCUQgCSA8LSAoc3VtKGFicyhmcmVxLWV4cChwcmVkaWN0KG1vZGVsKSkpKS8gKDIqbikpCgkJQUlDICA8LSBHMiAtIDIqZGYKCQlCSUMgIDwtIEcyICsgbG9nKG4pKnEKCQlzdGF0cyA8LSBjKGRmLEcyLEQsQUlDLEJJQykKCQluYW1lcyhzdGF0cykgPC0gYygiZGYiLCJHMiIsIkQiLCJBSUMiLCJCSUMiKQoJCXJldHVybihyb3VuZChzdGF0cywzKSkKCX0KYGBgCgpUaGUgZnVuY3Rpb24gcmV0dXJucyBlYWNoIG1vZGVsJ3MgZGVncmVlcyBvZiBmcmVlZG9tICgkZGYkKSwgdGhlIGxvZy1saWtlbGlob29kIHJhdGlvICgkR14yJCksIGRpc3NpbWlsYXJpdHkgaW5kZXggKCREJCksIEFrYWlrZSBpbmZvcm1hdGlvbiBjcml0ZXJpb24gKCRBSUMkKSBhbmQgQmF5ZXNpYW4gaW5mb3JtYXRpb24gY3JpdGVyaW9uICgkQklDJCkuIEZvciBleGFtcGxlOgoKYGBge3J9CiMgR29mcyBmb3IgaW5kZXBlbmRlbmNlIG1vZGVsCmdvZihkZiRGcmVxLG1vZGVsLmluZGVwKQoKIyBHb2ZzIGZvciBzYXR1cmF0ZWQgbW9kZWwKZ29mKGRmJEZyZXEsbW9kZWwuc2F0KQoKYGBgCgpOb3cgaXQncyBlYXN5IHRvIHB1dCB0aGVtIHRvZ2V0aGVyIGludG8gYSB0YWJsZToKCmBgYHtyfQoJbW9kZWxzIDwtIGMoIm1vZGVsLmluZGVwIiwibW9kZWwuc2F0IikKCXRhYmxlLmdvZiAgIDwtIE5VTEwgIyBFbXB0eSB0YWJsZQoKCglmb3IgKGkgaW4gbW9kZWxzKSB7CgkgIHRnb2YgPC0gZ29mKGRmJEZyZXEsIGV2YWwocGFyc2UodGV4dCA9IGkpKSkgIyBDb21wdXRlcyBHb29kbmVzcyBvZiBmaXQgc3RhdGlzdGljcyBmb3IgZWFjaCBtb2RlbAoJCXRhYmxlLmdvZiA8LSByYmluZCh0YWJsZS5nb2YsdGdvZikgICMgUGlsZXMgc3RhdHMgZm9yIGVhY2ggbW9kZWwKCX0KCglyb3duYW1lcyh0YWJsZS5nb2YpIDwtIGMoIkluZGVwZW5kZW5jZSIsIlNhdHVyYXRlZCIpCglwcmludCh0YWJsZS5nb2YpIApgYGAKCgoKCg==