Read data

load("nhanes_bmi.RData")
nhanes_bmi

Check covariates balance

n <- table(nhanes_bmi$School_meal)
z <- nhanes_bmi$School_meal
y <- nhanes_bmi$BMI
x <- as.matrix(nhanes_bmi[, -c(1 , 2)])
data_stat <- data.frame(t(apply(x, 2, function(x) c(mean(x[z==0]), sd(x[z==0]), mean(x[z==1]), sd(x[z==1])))))
colnames(data_stat) <- c("Mean control", "S.D. control", "Mean treated", "S.D. treated")
data_stat$t_stat <- (data_stat[, 3] - data_stat[, 1])/sqrt(data_stat[, 2]^2/n[1] + data_stat[, 4]^2/n[2])
signif(data_stat, 2)

Visualize coviariates balance

library("ggplot2")
mm <- data_stat[, 3] - data_stat[, 1]
sd_mm <- sqrt(data_stat[, 2]^2/n[1] + data_stat[, 4]^2/n[2])
temp <- data.frame(x = rownames(data_stat), y = mm, low = mm - 1.96 * sd_mm, 
                   high = mm + 1.96 * sd_mm)
ggplot(temp, aes(x, y)) + geom_point() + geom_hline(yintercept = 0) + geom_errorbar(aes(ymin = low, ymax = high))+ xlab("") + ylab("") + theme_classic() + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) 

Estimate propensity score

library(dplyr)
pscore <- glm (z ~ x , family = binomial)$fitted.values

temp <- data.frame(group = factor(z), pscore = pscore)
p <- ggplot(temp, aes(x=pscore, fill=group)) + geom_histogram(alpha=0.6, position = 'identity')
p

Check covariate balancing after stratification K = 5

NeymanSRE <- function(W, Y, strata.labels) {
  n <- length(W)
  groups <- unique(strata.labels)
  ests <- sapply(groups, function(gg) {
   wt <- sum(strata.labels == gg)/n
   est <- mean(Y[strata.labels == gg & W == 1]) - mean(Y[strata.labels == gg & W == 0])
   est.var <- var(Y[strata.labels == gg & W == 1])/sum(strata.labels == gg & W == 1) + 
     var(Y[strata.labels == gg & W == 0])/sum(strata.labels == gg & W == 0)
   return(c(wt, est, est.var))
 })
# print(ests)
 neyman.est <- sum(ests[1, ] * ests[2, ])
 neyman.var <- sum(ests[1, ]^2 * ests[3, ])
 return(c(est = neyman.est, se = sqrt(neyman.var)))
}

nn <- 5
q.pscore <- quantile(pscore , (1:( nn -1)) / nn)
ps.strata <- cut(pscore, breaks = c(0 , q.pscore ,1), labels = 1:nn)
balance_check <- apply(x, 2, function(v) NeymanSRE(z, v, ps.strata)) 


temp <- data.frame(x = colnames(x), y = balance_check[1, ], 
                   low = balance_check[1, ] - 1.96 * balance_check[2, ], 
                   high = balance_check[1, ] + 1.96 * balance_check[2, ])
ggplot(temp, aes(x, y)) + geom_point() + geom_hline(yintercept = 0) + geom_errorbar(aes(ymin = low, ymax = high))+ xlab("") + ylab("") + theme_classic() + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) 

Stratification based on propensity score

n.strata <- c(5 , 10 , 20 , 50 , 80)
strat.res <- sapply(n.strata , function(nn){
  q.pscore <- quantile(pscore , (1:( nn -1)) / nn)
  ps.strata <- cut(pscore, breaks = c(0 , q.pscore ,1), labels = 1:nn)
  NeymanSRE(z, y, ps.strata)})

colnames(strat.res) <- n.strata
round(strat.res, 3)
         5     10     20     50     80
est -0.116 -0.178 -0.200 -0.265 -0.204
se   0.283  0.282  0.279  0.272     NA
LS0tCnRpdGxlOiAiUiBleGFtcGxlIDVfMTogc2Nob29sIG1lYWwgcHJvZ3JhbSBkYXRhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpSZWFkIGRhdGEKCmBgYHtyfQpsb2FkKCJuaGFuZXNfYm1pLlJEYXRhIikKbmhhbmVzX2JtaQpgYGAKCgpDaGVjayBjb3ZhcmlhdGVzIGJhbGFuY2UKYGBge3J9Cm4gPC0gdGFibGUobmhhbmVzX2JtaSRTY2hvb2xfbWVhbCkKeiA8LSBuaGFuZXNfYm1pJFNjaG9vbF9tZWFsCnkgPC0gbmhhbmVzX2JtaSRCTUkKeCA8LSBhcy5tYXRyaXgobmhhbmVzX2JtaVssIC1jKDEgLCAyKV0pCmRhdGFfc3RhdCA8LSBkYXRhLmZyYW1lKHQoYXBwbHkoeCwgMiwgZnVuY3Rpb24oeCkgYyhtZWFuKHhbej09MF0pLCBzZCh4W3o9PTBdKSwgbWVhbih4W3o9PTFdKSwgc2QoeFt6PT0xXSkpKSkpCmNvbG5hbWVzKGRhdGFfc3RhdCkgPC0gYygiTWVhbiBjb250cm9sIiwgIlMuRC4gY29udHJvbCIsICJNZWFuIHRyZWF0ZWQiLCAiUy5ELiB0cmVhdGVkIikKZGF0YV9zdGF0JHRfc3RhdCA8LSAoZGF0YV9zdGF0WywgM10gLSBkYXRhX3N0YXRbLCAxXSkvc3FydChkYXRhX3N0YXRbLCAyXV4yL25bMV0gKyBkYXRhX3N0YXRbLCA0XV4yL25bMl0pCnNpZ25pZihkYXRhX3N0YXQsIDIpCmBgYAoKVmlzdWFsaXplIGNvdmlhcmlhdGVzIGJhbGFuY2UKCmBgYHtyfQpsaWJyYXJ5KCJnZ3Bsb3QyIikKbW0gPC0gZGF0YV9zdGF0WywgM10gLSBkYXRhX3N0YXRbLCAxXQpzZF9tbSA8LSBzcXJ0KGRhdGFfc3RhdFssIDJdXjIvblsxXSArIGRhdGFfc3RhdFssIDRdXjIvblsyXSkKdGVtcCA8LSBkYXRhLmZyYW1lKHggPSByb3duYW1lcyhkYXRhX3N0YXQpLCB5ID0gbW0sIGxvdyA9IG1tIC0gMS45NiAqIHNkX21tLCAKICAgICAgICAgICAgICAgICAgIGhpZ2ggPSBtbSArIDEuOTYgKiBzZF9tbSkKZ2dwbG90KHRlbXAsIGFlcyh4LCB5KSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwKSArIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBsb3csIHltYXggPSBoaWdoKSkrIHhsYWIoIiIpICsgeWxhYigiIikgKyB0aGVtZV9jbGFzc2ljKCkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCB2anVzdCA9IDAuNSwgaGp1c3Q9MSkpIApgYGAKCgpFc3RpbWF0ZSBwcm9wZW5zaXR5IHNjb3JlCgpgYGB7cn0KbGlicmFyeShkcGx5cikKcHNjb3JlIDwtIGdsbSAoeiB+IHggLCBmYW1pbHkgPSBiaW5vbWlhbCkkZml0dGVkLnZhbHVlcwoKdGVtcCA8LSBkYXRhLmZyYW1lKGdyb3VwID0gZmFjdG9yKHopLCBwc2NvcmUgPSBwc2NvcmUpCnAgPC0gZ2dwbG90KHRlbXAsIGFlcyh4PXBzY29yZSwgZmlsbD1ncm91cCkpICsgZ2VvbV9oaXN0b2dyYW0oYWxwaGE9MC42LCBwb3NpdGlvbiA9ICdpZGVudGl0eScpCnAKYGBgCgoKQ2hlY2sgY292YXJpYXRlIGJhbGFuY2luZyBhZnRlciBzdHJhdGlmaWNhdGlvbiBLID0gNQpgYGB7cn0KTmV5bWFuU1JFIDwtIGZ1bmN0aW9uKFcsIFksIHN0cmF0YS5sYWJlbHMpIHsKICBuIDwtIGxlbmd0aChXKQogIGdyb3VwcyA8LSB1bmlxdWUoc3RyYXRhLmxhYmVscykKICBlc3RzIDwtIHNhcHBseShncm91cHMsIGZ1bmN0aW9uKGdnKSB7CiAgIHd0IDwtIHN1bShzdHJhdGEubGFiZWxzID09IGdnKS9uCiAgIGVzdCA8LSBtZWFuKFlbc3RyYXRhLmxhYmVscyA9PSBnZyAmIFcgPT0gMV0pIC0gbWVhbihZW3N0cmF0YS5sYWJlbHMgPT0gZ2cgJiBXID09IDBdKQogICBlc3QudmFyIDwtIHZhcihZW3N0cmF0YS5sYWJlbHMgPT0gZ2cgJiBXID09IDFdKS9zdW0oc3RyYXRhLmxhYmVscyA9PSBnZyAmIFcgPT0gMSkgKyAKICAgICB2YXIoWVtzdHJhdGEubGFiZWxzID09IGdnICYgVyA9PSAwXSkvc3VtKHN0cmF0YS5sYWJlbHMgPT0gZ2cgJiBXID09IDApCiAgIHJldHVybihjKHd0LCBlc3QsIGVzdC52YXIpKQogfSkKIyBwcmludChlc3RzKQogbmV5bWFuLmVzdCA8LSBzdW0oZXN0c1sxLCBdICogZXN0c1syLCBdKQogbmV5bWFuLnZhciA8LSBzdW0oZXN0c1sxLCBdXjIgKiBlc3RzWzMsIF0pCiByZXR1cm4oYyhlc3QgPSBuZXltYW4uZXN0LCBzZSA9IHNxcnQobmV5bWFuLnZhcikpKQp9CgpubiA8LSA1CnEucHNjb3JlIDwtIHF1YW50aWxlKHBzY29yZSAsICgxOiggbm4gLTEpKSAvIG5uKQpwcy5zdHJhdGEgPC0gY3V0KHBzY29yZSwgYnJlYWtzID0gYygwICwgcS5wc2NvcmUgLDEpLCBsYWJlbHMgPSAxOm5uKQpiYWxhbmNlX2NoZWNrIDwtIGFwcGx5KHgsIDIsIGZ1bmN0aW9uKHYpIE5leW1hblNSRSh6LCB2LCBwcy5zdHJhdGEpKSAKCgp0ZW1wIDwtIGRhdGEuZnJhbWUoeCA9IGNvbG5hbWVzKHgpLCB5ID0gYmFsYW5jZV9jaGVja1sxLCBdLCAKICAgICAgICAgICAgICAgICAgIGxvdyA9IGJhbGFuY2VfY2hlY2tbMSwgXSAtIDEuOTYgKiBiYWxhbmNlX2NoZWNrWzIsIF0sIAogICAgICAgICAgICAgICAgICAgaGlnaCA9IGJhbGFuY2VfY2hlY2tbMSwgXSArIDEuOTYgKiBiYWxhbmNlX2NoZWNrWzIsIF0pCmdncGxvdCh0ZW1wLCBhZXMoeCwgeSkpICsgZ2VvbV9wb2ludCgpICsgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCkgKyBnZW9tX2Vycm9yYmFyKGFlcyh5bWluID0gbG93LCB5bWF4ID0gaGlnaCkpKyB4bGFiKCIiKSArIHlsYWIoIiIpICsgdGhlbWVfY2xhc3NpYygpICsgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgdmp1c3QgPSAwLjUsIGhqdXN0PTEpKSAKYGBgCgpTdHJhdGlmaWNhdGlvbiBiYXNlZCBvbiBwcm9wZW5zaXR5IHNjb3JlCmBgYHtyfQpuLnN0cmF0YSA8LSBjKDUgLCAxMCAsIDIwICwgNTAgLCA4MCkKc3RyYXQucmVzIDwtIHNhcHBseShuLnN0cmF0YSAsIGZ1bmN0aW9uKG5uKXsKICBxLnBzY29yZSA8LSBxdWFudGlsZShwc2NvcmUgLCAoMTooIG5uIC0xKSkgLyBubikKICBwcy5zdHJhdGEgPC0gY3V0KHBzY29yZSwgYnJlYWtzID0gYygwICwgcS5wc2NvcmUgLDEpLCBsYWJlbHMgPSAxOm5uKQogIE5leW1hblNSRSh6LCB5LCBwcy5zdHJhdGEpfSkKCmNvbG5hbWVzKHN0cmF0LnJlcykgPC0gbi5zdHJhdGEKcm91bmQoc3RyYXQucmVzLCAzKQpgYGAKCgo=