Model evaluation

Predicting heart transplant success

© University of Sydney

Heart transplant

Transplant rejection of patients that were on the waiting list for the Stanford heart transplant program.

  • Score developed to assign hearts for transplant.

  • Is the score effective?

We have patients that reject heart within one year and survive without a rejection after one year.

J Crowley and M Hu (1977), Covariance analysis of heart transplant survival data. Journal of the American Statistical Association, 72, 27-36.

Explore

## Warning: package 'readr' was built under R version 4.1.3
heart = read.csv("https://wimr-genomics.vip.sydney.edu.au/AMED3002/data/heart.csv")
heart$outcome <- rep(NA,nrow(heart))
heart$outcome[heart$futime/365>=1 & heart$reject==0] <- 'no'
heart$outcome[heart$futime/365<1 & heart$reject==1] <- 'yes'
heart$outcome <- factor(heart$outcome)

heart = na.omit(heart)

table(heart$outcome)
## 
##  no yes 
##  19  23
boxplot(mscore~outcome,heart, ylab = 'Match score',xlab = 'Rejection')

Model selection

fit0 <- glm(outcome~1,heart,family = binomial)
fitFull <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore,heart,family = binomial)

fit = step(glm(fit0,heart,family = binomial),
           scope = list(lower=fit0, upper=fitFull),
           direction = "forward", trace  = 0)

summary(fit)
## 
## Call:
## glm(formula = outcome ~ age + surgery + mscore, family = binomial, 
##     data = heart)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1036  -0.7670   0.3920   0.8025   1.8747  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -7.37758    3.04074  -2.426   0.0153 *
## age          0.13604    0.06027   2.257   0.0240 *
## surgery     -1.56404    0.96117  -1.627   0.1037  
## mscore       1.17452    0.78748   1.491   0.1358  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 57.843  on 41  degrees of freedom
## Residual deviance: 42.788  on 38  degrees of freedom
## AIC: 50.788
## 
## Number of Fisher Scoring iterations: 4

Resubstitution error

pred <- predict(fit, heart)
pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))
predSub <- pred
mean(predSub!=heart$outcome)
## [1] 0.2380952

5-fold cross validation

  • Create a fold vector.

  • Create somewhere to store predictions.

  • For each fold…

    1. Extract that fold as a test set, the remainder as a training set.
    2. Fit classifier on the training set.
    3. Predict class of test set with classifier.
    4. Add this prediction to the storage vector.
  • Calculate error/accuracy by comparing predictions to true class.

5-fold cross validation

## 5-fold cross validation with feature selection inside for-loop

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5


# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]


# Reorder these to avoid systematic ordering bias
foldSample <- sample(fold, n, replace = FALSE)


# Create something to store errors.
predCV <- factor(rep(NA, n), levels = c("no", "yes"))


for(i in 1:nFolds){
# Split into training and test set    
dataTest <- heart[foldSample == i, ]
dataTrain <- heart[foldSample != i, ]

# Fit model on dataTrain
fit0 <- glm(outcome~1, dataTrain, family = binomial)
fitFull <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore, dataTrain,family = binomial)

fit = step(glm(fit0, dataTrain, family = binomial),
           scope = list(lower=fit0, upper=fitFull),
           direction = "forward", trace  = 0)

# Predict on dataTest
pred <- predict(fit, dataTest)
pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

# Save test predictions
predCV[foldSample == i] <- pred
}

mean(predCV != heart$outcome)
## [1] 0.3571429

Repeated 5-fold cross validation

## Repeated 5-fold cross validation with feature selection inside for-loop

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5
nRep <- 100

# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]

# Create matrix to store repeated CV error rates
errorRepCV <- matrix(NA, nRep, n)

for(j in 1:nRep){
# Reorder these to avoid systematic ordering bias
    foldSample <- sample(fold, n, replace = FALSE)

    predCVLoop <- factor(rep(NA, n), levels = c("no", "yes"))

    for(i in 1:nFolds){
        # Split into training and test set    
        dataTest <- heart[foldSample == i, ]
        dataTrain <- heart[foldSample != i, ]

        # Fit model on dataTrain
        fit0 <- glm(outcome~1, dataTrain, family = binomial)
        fitFull <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore, 
                       dataTrain,family = binomial)

        fit = step(glm(fit0, dataTrain, family = binomial),
                    scope = list(lower=fit0, upper=fitFull),
                    direction = "forward", trace  = 0)

        # Predict on dataTest
        pred <- predict(fit, dataTest)
        pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

        # Save test predictions
        predCVLoop[foldSample == i] <- pred
    }
    errorRepCV[j,] <- predCVLoop != heart$outcome
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mean(errorRepCV)
## [1] 0.39
boxplot(rowMeans(errorRepCV))
abline(h = 0.5, lty = 'dotted')

fullCV <- rowMeans(errorRepCV)

No Feature selection

## Repeated 5-fold cross validation with NO feature selection

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5
nRep <- 100

# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]

# Create matrix to store repeated CV error rates
errorRepCV <- matrix(NA, nRep, n)

for(j in 1:nRep){
# Reorder these to avoid systematic ordering bias
    foldSample <- sample(fold, n, replace = FALSE)

    predCVLoop <- factor(rep(NA, n), levels = c("no", "yes"))

    for(i in 1:nFolds){
        # Split into training and test set    
        dataTest <- heart[foldSample == i, ]
        dataTrain <- heart[foldSample != i, ]

        # Fit model on dataTrain
        
        fit <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore, 
                       dataTrain,family = binomial)

        # Predict on dataTest
        pred <- predict(fit, dataTest)
        pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

        # Save test predictions
        predCVLoop[foldSample == i] <- pred
    }
    errorRepCV[j,] <- predCVLoop != heart$outcome
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mean(errorRepCV)
## [1] 0.3561905
boxplot(rowMeans(errorRepCV))
abline(h = 0.5, lty = 'dotted')

noFeatCV <- rowMeans(errorRepCV)

Feature selection outside of CV

## Repeated 5-fold cross validation with feature selection OUTSIDE of for-loop

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5
nRep <- 100

# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]

# Create matrix to store repeated CV error rates
errorRepCV <- matrix(NA, nRep, n)

for(j in 1:nRep){
# Reorder these to avoid systematic ordering bias
    foldSample <- sample(fold, n, replace = FALSE)

    predCVLoop <- factor(rep(NA, n), levels = c("no", "yes"))

    for(i in 1:nFolds){
        # Split into training and test set    
        dataTest <- heart[foldSample == i, ]
        dataTrain <- heart[foldSample != i, ]

        # Fit model on dataTrain
        
        fit <- glm(outcome~surgery+age+mscore, 
                       dataTrain,family = binomial)

        # Predict on dataTest
        pred <- predict(fit, dataTest)
        pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

        # Save test predictions
        predCVLoop[foldSample == i] <- pred
    }
    errorRepCV[j,] <- predCVLoop != heart$outcome
}

mean(errorRepCV)
## [1] 0.3157143
boxplot(rowMeans(errorRepCV))
abline(h = 0.5, lty = 'dotted')

featOutsideCV <- rowMeans(errorRepCV)

Comparison of cross-validation procedures.

df <- data.frame(fullCV,noFeatCV,featOutsideCV)

df <- pivot_longer(df, cols = colnames(df), names_to = "Method", values_to = "Error")

ggplot(df, aes(x = Method, y = Error)) + geom_boxplot() + 
    theme_classic() + geom_hline(yintercept = 0.5, linetype = "dotted")