Uplift and SPRINT
library(uplift)
x <- read.csv("sprint baseline and outcomes corrected.csv", as.is=TRUE)
x$alive <- 1 - x$event_mi
col2fac <- c("smoke_3cat", "aspirin", "female", "race4", "statin")
x[col2fac] <- lapply(x[col2fac], factor)
cols <- c("alive", "intensive",
"risk10yrs", "sbp", "dbp", "n_agents",
"egfr", "screat", "age", "chr", "glur",
"hdl", "trr", "umalcr", "bmi",
"smoke_3cat", "aspirin", "female", "race4", "statin")
x <- x[,cols]
x <- x[complete.cases(x),]set.seed(1)
samp <- sample.int(nrow(x))
train <- samp[1:7000]
fit1 <- upliftRF(alive ~
risk10yrs +
sbp + dbp + n_agents + smoke_3cat +
aspirin + egfr + screat +
age + female + race4 +
chr + glur + hdl + trr + umalcr +
bmi + statin
+ trt(intensive),
data = x[train,],
mtry = 7,
ntree = 60,
split_method = "KL",
minsplit = 20,
verbose = FALSE)
vals <- predict(fit1, x[train,])
upliftscores <- vals[,1] - vals[,2]
hist(upliftscores, breaks=20)
Permutation Test:
x$alive.random <- sample(x$alive)
fit2 <- upliftRF(alive.random ~
risk10yrs +
sbp + dbp + n_agents + smoke_3cat +
aspirin + egfr + screat +
age + female + race4 +
chr + glur + hdl + trr + umalcr +
bmi + statin
+ trt(intensive),
data = x[train,],
mtry = 7,
ntree = 60,
split_method = "KL",
minsplit = 20,
verbose = FALSE)
vals.random <- predict(fit2, x[train,])
upliftscores.random <- vals.random[,1] - vals.random[,2]
hist(upliftscores.random, breaks=20)
Uplift Graph
o <- order(upliftscores, decreasing=TRUE)
avoided <- rep(NA, nrow(vals))
counts <- rep(0, 2) # num alive in ctrl, num alive in trt
denom <- rep(0, 2) # total in ctrl, num total in trt
for (target in 1:nrow(vals)) {
if (x$intensive[train[o[target]]] == 1) {
counts[2] <- counts[2] + x$alive[train[o[target]]]
denom[2] <- denom[2]+1
} else {
counts[1] <- counts[1] + x$alive[train[o[target]]]
denom[1] <- denom[1]+1
}
fracs <- counts/denom
avoided[target] <- (fracs[2]-fracs[1])*target
}
o <- order(upliftscores.random, decreasing=TRUE)
avoided.random <- rep(NA, nrow(vals.random))
counts <- rep(0, 2) # num alive in ctrl, num alive in trt
denom <- rep(0, 2) # total in ctrl, num total in trt
for (target in 1:nrow(vals.random)) {
if (x$intensive[train[o[target]]] == 1) {
counts[2] <- counts[2] + x$alive.random[train[o[target]]]
denom[2] <- denom[2]+1
} else {
counts[1] <- counts[1] + x$alive.random[train[o[target]]]
denom[1] <- denom[1]+1
}
fracs <- counts/denom
avoided.random[target] <- (fracs[2]-fracs[1])*target
}plot(rev(avoided), type="l", lwd=3, col="red")
points(rev(avoided.random), type="l", lwd=3, col="blue")