Source code for beexai.explanation.explaining

"""General explainer classes and subclassed explainers"""

import abc
import os
from typing import Callable, Optional, Union

import captum
import joblib
import numpy as np
import pandas as pd
import torch
from captum.attr import (DeepLift, FeatureAblation, InputXGradient,
                         IntegratedGradients, KernelShap, Lime, Saliency,
                         ShapleyValueSampling)


[docs] class GeneralExplainer: """General explainer class. Attributes: model (Callable): model to explain task (str): task to perform Methods: init_explainer: initialize explainer explain: explain a single instance feature_order: get indexes of features sorted by importance Args: model (Callable): model to explain task (str): task to perform """ def __init__(self, model: Callable, task: str): assert task in [ "classification", "regression", ], f"task must be in ['classification', 'regression'], found {task}" self.model = model self.task = task
[docs] @abc.abstractmethod def init_explainer(self, *args, **kwargs) -> None: """Initialize explainer."""
[docs] @abc.abstractmethod def explain( self, x_test: Union[pd.DataFrame, torch.Tensor, np.ndarray], *args, **kwargs ) -> torch.Tensor: """Explain a single instance. Args: x_test (pd.DataFrame): test set *args: additional arguments **kwargs: additional keyword arguments Returns: torch.Tensor: array of attributions (n_samples, n_features) """
[docs] def feature_order(self, attributions: torch.Tensor) -> torch.Tensor: """Get indexes of features sorted by importance. Args: attributions (torch.Tensor): array of attributions (n_samples, n_features) Returns: torch.Tensor: array of indexes of features sorted by importance """ if isinstance(attributions, np.ndarray): return np.argsort(-(attributions), axis=1) if isinstance(attributions, torch.Tensor): return torch.argsort(-(attributions), axis=1) raise ValueError("Attributions must be a torch.Tensor or a np.ndarray")
[docs] def compute_attributions( self, x_in: pd.DataFrame, data_name: str, model_name: str, method_name: str, folder_path: str, preds: Optional[np.ndarray] = None, save: bool = False, use_abs: bool = False, ) -> torch.Tensor: """Save the attributions of a model in folder "folder_path/attributions/data_name" and for each label in "folder_path/attributions/data_name/model_name_method_name_label.pkl". If attributions are already saved, they are loaded from the same folder. Args: x_in (pd.DataFrame): input data data_name (str): name of the dataset model_name (str): name of the model method_name (str): name of the method folder_path (str): path of the folder where to save the attributions or retrieve existing attributions if previously saved. preds (np.ndarray, optional): predictions of the model. Defaults to None. save (bool, optional): whether to save the attributions. Defaults to False. use_abs (bool, optional): whether to use the absolute value of the attributions. Defaults to False. Returns: torch.Tensor: tensor of attributions (n_samples, n_features) """ assert hasattr(self, "explainer"), "Explainer not initialized" assert hasattr(self, "explain"), "Explainer must have an explain method" att_folder = f"{folder_path}/attributions/{data_name}/" suffix = f"{model_name}_{method_name}.pkl" if os.path.exists(att_folder + suffix): attribution = joblib.load(att_folder + suffix) else: attribution = self.explain(x_in, label=preds, absolute=use_abs) if save: if not os.path.exists(att_folder): os.makedirs(att_folder) joblib.dump(attribution, att_folder + suffix) return attribution
[docs] class CaptumExplainer(GeneralExplainer): """Captum explainer class. Attributes: model (Callable): model to explain task (str): task to perform method (str): method to use sklearn (bool): whether to use a sklearn model explainer (captum.attr.Attribution): explainer all_methods (dict): all methods available device (str): device to use Methods: init_explainer: initialize explainer explain: explain a single instance Args: model (Callable): model to explain task (str): task to perform method (str): method to use. Must be one of the following: DeepLift, IntegratedGradients, Saliency, ShapleyValueSampling, KernelShap, InputXGradient, FeatureAblation, Lime sklearn (bool, optional): whether to use a sklearn model. Defaults to False. device (str, optional): device to use. Defaults to "cpu". """ def __init__( self, model: Callable, task: str, method: str, sklearn: bool = False, device: str = "cpu", ): super().__init__(model, task) self.method = method self.sklearn = sklearn self.explainer = None self.all_methods = { "DeepLift": DeepLift, "IntegratedGradients": IntegratedGradients, "Saliency": Saliency, "ShapleyValueSampling": ShapleyValueSampling, "KernelShap": KernelShap, "InputXGradient": InputXGradient, "FeatureAblation": FeatureAblation, "Lime": Lime, } assert method in self.all_methods, ( f"Method {method} not available. Choose one of the following: " f"{list(self.all_methods)}" ) assert sklearn is False or method not in [ "DeepLift", "IntegratedGradients", "Saliency", "InputXGradient", ], f"""Method {method} not available for sklearn models. Choose one of the following: ['ShapleyValueSampling', 'KernelShap', 'FeatureAblation', 'Lime']""" self.device = device def __forward_wrapper__(self, tensor: torch.Tensor) -> torch.Tensor: """Wrapper for sklearn model to convert the input in tensors.""" x_in = tensor.detach().cpu().numpy() if self.task == "regression": with torch.no_grad(): output = self.model.predict(x_in) elif self.task == "classification": with torch.no_grad(): output = self.model.predict_proba(x_in) return torch.from_numpy(output)
[docs] def init_explainer(self) -> captum.attr.Attribution: """Initialize Captum explainer. Returns: captum.attr.Attribution: explainer """ if self.sklearn: explainer = self.all_methods[self.method](self.__forward_wrapper__) else: explainer = self.all_methods[self.method](self.model) self.explainer = explainer return explainer
[docs] def explain( self, x_test: pd.DataFrame, label: Optional[Union[int, list, torch.Tensor, np.ndarray]] = None, absolute: bool = False, ) -> torch.Tensor: """Explain the whole set. Args: x_test (pd.DataFrame): test set label (int, list, np.ndarray, torch.Tensor, optional): label(s) of interest. Defaults to None. A list of labels for each instance can be provided. absolute (bool, optional): whether to use the absolute value of the attributions. Defaults to False. Returns: torch.Tensor: array of attributions (n_samples, n_features) """ assert self.explainer is not None, "Explainer not initialized" x_tensor_test = x_test if isinstance(x_test, pd.DataFrame): x_tensor_test = torch.tensor(x_tensor_test.values).float() elif isinstance(x_test, np.ndarray): x_tensor_test = torch.tensor(x_tensor_test).float() if self.task == "regression": target = None elif self.task == "classification" and label is None: target = self.model.predict(x_tensor_test) else: target = label if self.task == "classification" and not isinstance(target, int): if isinstance(target, (np.ndarray, list)): target = torch.tensor(target, device=self.device) target = target.long() x_tensor_test = x_tensor_test.to(self.device) if self.method in ["Lime", "ShapleyValueSampling", "KernelShap"]: attributions = self.explainer.attribute( x_tensor_test, target=target, perturbations_per_eval=32 ) else: attributions = self.explainer.attribute(x_tensor_test, target=target) attributions = attributions.float() if absolute: attributions = torch.abs(attributions) attributions = attributions / torch.norm(attributions) return attributions