Model evaluation

Predicting heart transplant success

Heart transplant

Transplant rejection of patients that were on the waiting list for the Stanford heart transplant program.

Score developed to assign hearts for transplant.
Is the score effective?

We have patients that reject heart within one year and survive without a rejection after one year.

J Crowley and M Hu (1977), Covariance analysis of heart transplant survival data. Journal of the American Statistical Association, 72, 27-36.

Explore

## Warning: package 'readr' was built under R version 4.1.3

heart = read.csv("https://wimr-genomics.vip.sydney.edu.au/AMED3002/data/heart.csv")
heart$outcome <- rep(NA,nrow(heart))
heart$outcome[heart$futime/365>=1 & heart$reject==0] <- 'no'
heart$outcome[heart$futime/365<1 & heart$reject==1] <- 'yes'
heart$outcome <- factor(heart$outcome)

heart = na.omit(heart)

table(heart$outcome)

## 
##  no yes 
##  19  23

boxplot(mscore~outcome,heart, ylab = 'Match score',xlab = 'Rejection')

Model selection

fit0 <- glm(outcome~1,heart,family = binomial)
fitFull <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore,heart,family = binomial)

fit = step(glm(fit0,heart,family = binomial),
           scope = list(lower=fit0, upper=fitFull),
           direction = "forward", trace  = 0)

summary(fit)

## 
## Call:
## glm(formula = outcome ~ age + surgery + mscore, family = binomial, 
##     data = heart)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1036  -0.7670   0.3920   0.8025   1.8747  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -7.37758    3.04074  -2.426   0.0153 *
## age          0.13604    0.06027   2.257   0.0240 *
## surgery     -1.56404    0.96117  -1.627   0.1037  
## mscore       1.17452    0.78748   1.491   0.1358  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 57.843  on 41  degrees of freedom
## Residual deviance: 42.788  on 38  degrees of freedom
## AIC: 50.788
## 
## Number of Fisher Scoring iterations: 4

Resubstitution error

pred <- predict(fit, heart)
pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))
predSub <- pred
mean(predSub!=heart$outcome)

## [1] 0.2380952

5-fold cross validation

Create a fold vector.
Create somewhere to store predictions.
For each fold…
1. Extract that fold as a test set, the remainder as a training set.
2. Fit classifier on the training set.
3. Predict class of test set with classifier.
4. Add this prediction to the storage vector.
Calculate error/accuracy by comparing predictions to true class.

5-fold cross validation

## 5-fold cross validation with feature selection inside for-loop

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5


# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]


# Reorder these to avoid systematic ordering bias
foldSample <- sample(fold, n, replace = FALSE)


# Create something to store errors.
predCV <- factor(rep(NA, n), levels = c("no", "yes"))


for(i in 1:nFolds){
# Split into training and test set    
dataTest <- heart[foldSample == i, ]
dataTrain <- heart[foldSample != i, ]

# Fit model on dataTrain
fit0 <- glm(outcome~1, dataTrain, family = binomial)
fitFull <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore, dataTrain,family = binomial)

fit = step(glm(fit0, dataTrain, family = binomial),
           scope = list(lower=fit0, upper=fitFull),
           direction = "forward", trace  = 0)

# Predict on dataTest
pred <- predict(fit, dataTest)
pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

# Save test predictions
predCV[foldSample == i] <- pred
}

mean(predCV != heart$outcome)

## [1] 0.3571429

Repeated 5-fold cross validation

## Repeated 5-fold cross validation with feature selection inside for-loop

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5
nRep <- 100

# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]

# Create matrix to store repeated CV error rates
errorRepCV <- matrix(NA, nRep, n)

for(j in 1:nRep){
# Reorder these to avoid systematic ordering bias
    foldSample <- sample(fold, n, replace = FALSE)

    predCVLoop <- factor(rep(NA, n), levels = c("no", "yes"))

    for(i in 1:nFolds){
        # Split into training and test set    
        dataTest <- heart[foldSample == i, ]
        dataTrain <- heart[foldSample != i, ]

        # Fit model on dataTrain
        fit0 <- glm(outcome~1, dataTrain, family = binomial)
        fitFull <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore, 
                       dataTrain,family = binomial)

        fit = step(glm(fit0, dataTrain, family = binomial),
                    scope = list(lower=fit0, upper=fitFull),
                    direction = "forward", trace  = 0)

        # Predict on dataTest
        pred <- predict(fit, dataTest)
        pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

        # Save test predictions
        predCVLoop[foldSample == i] <- pred
    }
    errorRepCV[j,] <- predCVLoop != heart$outcome
}

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

mean(errorRepCV)

## [1] 0.39

boxplot(rowMeans(errorRepCV))
abline(h = 0.5, lty = 'dotted')

fullCV <- rowMeans(errorRepCV)

No Feature selection

## Repeated 5-fold cross validation with NO feature selection

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5
nRep <- 100

# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]

# Create matrix to store repeated CV error rates
errorRepCV <- matrix(NA, nRep, n)

for(j in 1:nRep){
# Reorder these to avoid systematic ordering bias
    foldSample <- sample(fold, n, replace = FALSE)

    predCVLoop <- factor(rep(NA, n), levels = c("no", "yes"))

    for(i in 1:nFolds){
        # Split into training and test set    
        dataTest <- heart[foldSample == i, ]
        dataTrain <- heart[foldSample != i, ]

        # Fit model on dataTrain
        
        fit <- glm(outcome~surgery+age+wait.time+mismatch+hla.a2+mscore, 
                       dataTrain,family = binomial)

        # Predict on dataTest
        pred <- predict(fit, dataTest)
        pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

        # Save test predictions
        predCVLoop[foldSample == i] <- pred
    }
    errorRepCV[j,] <- predCVLoop != heart$outcome
}

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

mean(errorRepCV)

## [1] 0.3561905

boxplot(rowMeans(errorRepCV))
abline(h = 0.5, lty = 'dotted')

noFeatCV <- rowMeans(errorRepCV)

Feature selection outside of CV

## Repeated 5-fold cross validation with feature selection OUTSIDE of for-loop

set.seed(51773)
n <- nrow(heart) # The number of observations
nFolds <- 5
nRep <- 100

# Create a vector with the same amount of fold labels
fold <- rep(1:nFolds, 100) 
fold <- fold[1:n]

# Create matrix to store repeated CV error rates
errorRepCV <- matrix(NA, nRep, n)

for(j in 1:nRep){
# Reorder these to avoid systematic ordering bias
    foldSample <- sample(fold, n, replace = FALSE)

    predCVLoop <- factor(rep(NA, n), levels = c("no", "yes"))

    for(i in 1:nFolds){
        # Split into training and test set    
        dataTest <- heart[foldSample == i, ]
        dataTrain <- heart[foldSample != i, ]

        # Fit model on dataTrain
        
        fit <- glm(outcome~surgery+age+mscore, 
                       dataTrain,family = binomial)

        # Predict on dataTest
        pred <- predict(fit, dataTest)
        pred <- factor(pred>0.5, 
                     levels = c(FALSE, TRUE),
                     labels = c("no", "yes"))

        # Save test predictions
        predCVLoop[foldSample == i] <- pred
    }
    errorRepCV[j,] <- predCVLoop != heart$outcome
}

mean(errorRepCV)

## [1] 0.3157143

boxplot(rowMeans(errorRepCV))
abline(h = 0.5, lty = 'dotted')

featOutsideCV <- rowMeans(errorRepCV)

Comparison of cross-validation procedures.

df <- data.frame(fullCV,noFeatCV,featOutsideCV)

df <- pivot_longer(df, cols = colnames(df), names_to = "Method", values_to = "Error")

ggplot(df, aes(x = Method, y = Error)) + geom_boxplot() + 
    theme_classic() + geom_hline(yintercept = 0.5, linetype = "dotted")