• 1 Ch 18, Explaining Models/Predictions
    • 1.1 Techniques
    • 1.2 Local Explanations
    • 1.3 Global Explanation
    • 1.4 Partial Dependence
if(dir.exists("static/lectures")){
  model_fit <- readRDS("static/lectures/model_fit.RDS")
  modeldata <- readRDS("static/lectures/modeldata.RDS") 
} else {
  base_str <- "https://github.com/erhla/pa470spring2022/raw/main/static/lectures/"
  model_fit <- readRDS(url("https://github.com/erhla/pa470spring2022/raw/main/static/lectures/model_fit.RDS"))
  modeldata <- readRDS(url("https://github.com/erhla/pa470spring2022/raw/main/static/lectures/modeldata.RDS"))
}


modeldata <- modeldata %>% 
  select(sp_log, parcel_num, Neighborhood, ASSESSEDVALUE,
    total_squa, heightcat, extcat, has_garage,
    has_basement, bathcat, total_floo, pct_over_50,
    foreclosures_per_prop,
    year_built, GEOID, sale_date) %>% 
  mutate(across(where(is.character), as.factor))

1 Ch 18, Explaining Models/Predictions

Why do our models make the predictions they do? Model explanations can shed some light on this. This lecture uses the data from Detroit, the old version of the assessment assignment.

1.1 Techniques

  • vip functions when we want to use model-based methods that take advantage of model structure (and are often faster), and
  • DALEX functions when we want to use model-agnostic methods that can be applied to any model.

Let’s look at building a model-agnostic explainer for our model.

library(DALEXtra)
## Loading required package: DALEX
## Welcome to DALEX (version: 2.4.3).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## 
## Attaching package: 'DALEX'
## The following object is masked from 'package:dplyr':
## 
##     explain
sales <- modeldata %>% filter(!is.na(sp_log), year_built != 0)

explain_reg <- explain_tidymodels(
  model_fit,
  data = sales,
  y = sales$sp_log,
  verbose=TRUE
)
## Preparation of a new explainer is initiated
##   -> model label       :  workflow  (  default  )
##   -> data              :  4336  rows  16  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  4336  values 
##   -> predict function  :  yhat.workflow  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package tidymodels , ver. 1.1.0 , task regression (  default  ) 
##   -> predicted values  :  numerical, min =  3.836263 , mean =  4.510255 , max =  5.634867  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -1.258597 , mean =  -0.1755897 , max =  0.6319306  
##   A new explainer has been created!

1.2 Local Explanations

A local explanation will provide information about a prediction for a single observation.

targ <- modeldata %>% filter(parcel_num == '22027164.')

targ %>% t() %>%
  kableExtra::kable() %>%
  kableExtra::kable_material(c("striped", "hover"))
sp_log 4.845098
parcel_num
Neighborhood 0033A
ASSESSEDVALUE 19900
total_squa 4792
heightcat 2
extcat 3
has_garage 1
has_basement 1
bathcat 1
total_floo 1462
pct_over_50 0.6666667
foreclosures_per_prop 0.1332053
year_built 1941
GEOID 26163539400
sale_date 2019-01-01
preds <- predict_parts(explainer = explain_reg, 
              new_observation = targ)

preds
ABCDEFGHIJ0123456789
variable
<chr>
contribution
<dbl>
variable_name
<chr>
intercept4.51025487intercept
ASSESSEDVALUE = 199000.11285294ASSESSEDVALUE
total_floo = 14620.02634718total_floo
foreclosures_per_prop = 0.1332052513608710.03825646foreclosures_per_prop
year_built = 19410.01722600year_built
total_squa = 47920.01508530total_squa
pct_over_50 = 0.666666666666667-0.01248178pct_over_50
sp_log = 4.845098040014260.00000000sp_log
parcel_num = 22027164.0.00000000parcel_num
Neighborhood = 0033A0.00000000Neighborhood

It is pretty expensive to compute important features. Best practice is to use Shapley Additive Explanations (SHAP) “where the average contributions of features are computed under different combinations or “coalitions” of feature orderings.”

preds_shap <- 
  predict_parts(
    explainer = explain_reg, 
    new_observation = targ, 
    type = "shap",
    B = 20
  )

plot(preds_shap)

Let’s look at another property.

targ2 <- modeldata %>% filter(parcel_num == '08009807.')

predict_parts(
  explainer = explain_reg,
  new_observation = targ2,
  type = "shap",
  B = 20
) %>% plot()

The explanation is local, so each data point has unique contributions.

1.3 Global Explanation

Also called variable importance. These explanations can help us understand which features are most important in driving predictions aggregated over all the data.

One way to compute vip is to permute the features and measure how much worse the model is when the data is fitted before/after shuffling.

vip_model <- model_parts(explain_reg, 
                    loss_function = loss_root_mean_square)

plot(vip_model)

Mirroring Figure 18.4 from the textbook.

ggplot_imp <- function(...) {
  obj <- list(...)
  metric_name <- attr(obj[[1]], "loss_name")
  metric_lab <- paste(metric_name, 
                      "after permutations\n(higher indicates more important)")
  
  full_vip <- bind_rows(obj) %>%
    filter(variable != "_baseline_")
  
  perm_vals <- full_vip %>% 
    filter(variable == "_full_model_") %>% 
    group_by(label) %>% 
    summarise(dropout_loss = mean(dropout_loss))
  
  p <- full_vip %>%
    filter(variable != "_full_model_") %>% 
    mutate(variable = fct_reorder(variable, dropout_loss)) %>%
    ggplot(aes(dropout_loss, variable)) 
  if(length(obj) > 1) {
    p <- p + 
      facet_wrap(vars(label)) +
      geom_vline(data = perm_vals, aes(xintercept = dropout_loss, color = label),
                 size = 1.4, lty = 2, alpha = 0.7) +
      geom_boxplot(aes(color = label, fill = label), alpha = 0.2)
  } else {
    p <- p + 
      geom_vline(data = perm_vals, aes(xintercept = dropout_loss),
                 size = 1.4, lty = 2, alpha = 0.7) +
      geom_boxplot(fill = "#91CBD765", alpha = 0.4)
    
  }
  p +
    theme(legend.position = "none") +
    labs(x = metric_lab, 
         y = NULL,  fill = NULL,  color = NULL)
}

ggplot_imp(vip_model)

1.4 Partial Dependence

We can also build global model explanations by aggregating local explanations with partial dependence profiles (imagine a derivative).

ggplot_pdp <- function(obj, x) {
  
  p <- 
    as_tibble(obj$agr_profiles) %>%
    mutate(`_label_` = stringr::str_remove(`_label_`, "^[^_]*_")) %>%
    ggplot(aes(`_x_`, `_yhat_`)) +
    geom_line(data = as_tibble(obj$cp_profiles),
              aes(x = {{ x }}, group = `_ids_`),
              size = 0.5, alpha = 0.05, color = "gray50")
  
  num_colors <- n_distinct(obj$agr_profiles$`_label_`)
  
  if (num_colors > 1) {
    p <- p + geom_line(aes(color = `_label_`), size = 1.2, alpha = 0.8)
  } else {
    p <- p + geom_line(color = "midnightblue", size = 1.2, alpha = 0.8)
  }
  
  p
}

pdp_age <- model_profile(explain_reg, 
                         N=500,
                         variable = "year_built")

ggplot_pdp(pdp_age, year_built)  +
  labs(x = "Year built", 
       y = "Sale Price (log)", 
       color = NULL)

pdp_floor <- model_profile(explain_reg, 
                         N=500,
                         variable = "total_floo")

ggplot_pdp(pdp_age, total_floo)  +
  scale_x_log10() +
  labs(x = "Total Floor Area", 
       y = "Sale Price (log)", 
       color = NULL)