Source code for beexai.explanation.explaining

"""General explainer classes and subclassed explainers"""

import abc
import os
from typing import Callable, Optional, Union

import captum
import joblib
import numpy as np
import pandas as pd
import torch
from captum.attr import (DeepLift, FeatureAblation, InputXGradient,
                         IntegratedGradients, KernelShap, Lime, Saliency,
                         ShapleyValueSampling)



[docs]
class GeneralExplainer:
    """General explainer class.

    Attributes:
        model (Callable): model to explain
        task (str): task to perform

    Methods:
        init_explainer: initialize explainer
        explain: explain a single instance
        feature_order: get indexes of features sorted by importance

    Args:
        model (Callable): model to explain
        task (str): task to perform
    """

    def __init__(self, model: Callable, task: str):
        assert task in [
            "classification",
            "regression",
        ], f"task must be in ['classification', 'regression'], found {task}"
        self.model = model
        self.task = task


[docs]
    @abc.abstractmethod
    def init_explainer(self, *args, **kwargs) -> None:
        """Initialize explainer."""



[docs]
    @abc.abstractmethod
    def explain(
        self, x_test: Union[pd.DataFrame, torch.Tensor, np.ndarray], *args, **kwargs
    ) -> torch.Tensor:
        """Explain a single instance.

        Args:
            x_test (pd.DataFrame): test set
            *args: additional arguments
            **kwargs: additional keyword arguments

        Returns:
            torch.Tensor: array of attributions (n_samples, n_features)
        """



[docs]
    def feature_order(self, attributions: torch.Tensor) -> torch.Tensor:
        """Get indexes of features sorted by importance.

        Args:
            attributions (torch.Tensor): array of attributions (n_samples, n_features)

        Returns:
            torch.Tensor: array of indexes of features sorted by importance
        """
        if isinstance(attributions, np.ndarray):
            return np.argsort(-(attributions), axis=1)
        if isinstance(attributions, torch.Tensor):
            return torch.argsort(-(attributions), axis=1)
        raise ValueError("Attributions must be a torch.Tensor or a np.ndarray")



[docs]
    def compute_attributions(
        self,
        x_in: pd.DataFrame,
        data_name: str,
        model_name: str,
        method_name: str,
        folder_path: str,
        preds: Optional[np.ndarray] = None,
        save: bool = False,
        use_abs: bool = False,
    ) -> torch.Tensor:
        """Save the attributions of a model in folder "folder_path/attributions/data_name"
        and for each label in "folder_path/attributions/data_name/model_name_method_name_label.pkl".
        If attributions are already saved, they are loaded from the same folder.

        Args:
            x_in (pd.DataFrame): input data
            data_name (str): name of the dataset
            model_name (str): name of the model
            method_name (str): name of the method
            folder_path (str): path of the folder where to save the attributions
                or retrieve existing attributions if previously saved.
            preds (np.ndarray, optional): predictions of the model. Defaults to None.
            save (bool, optional): whether to save the attributions. Defaults to False.
            use_abs (bool, optional): whether to use the absolute value of the attributions.
                Defaults to False.

        Returns:
            torch.Tensor: tensor of attributions (n_samples, n_features)
        """
        assert hasattr(self, "explainer"), "Explainer not initialized"
        assert hasattr(self, "explain"), "Explainer must have an explain method"
        att_folder = f"{folder_path}/attributions/{data_name}/"
        suffix = f"{model_name}_{method_name}.pkl"
        if os.path.exists(att_folder + suffix):
            attribution = joblib.load(att_folder + suffix)
        else:
            attribution = self.explain(x_in, label=preds, absolute=use_abs)
            if save:
                if not os.path.exists(att_folder):
                    os.makedirs(att_folder)
                joblib.dump(attribution, att_folder + suffix)
        return attribution





[docs]
class CaptumExplainer(GeneralExplainer):
    """Captum explainer class.

    Attributes:
        model (Callable): model to explain
        task (str): task to perform
        method (str): method to use
        sklearn (bool): whether to use a sklearn model
        explainer (captum.attr.Attribution): explainer
        all_methods (dict): all methods available
        device (str): device to use

    Methods:
        init_explainer: initialize explainer
        explain: explain a single instance

    Args:
        model (Callable): model to explain
        task (str): task to perform
        method (str): method to use. Must be one of the following:
            DeepLift, IntegratedGradients, Saliency, ShapleyValueSampling,
            KernelShap, InputXGradient, FeatureAblation, Lime
        sklearn (bool, optional): whether to use a sklearn model.
            Defaults to False.
        device (str, optional): device to use. Defaults to "cpu".
    """

    def __init__(
        self,
        model: Callable,
        task: str,
        method: str,
        sklearn: bool = False,
        device: str = "cpu",
    ):
        super().__init__(model, task)
        self.method = method
        self.sklearn = sklearn
        self.explainer = None
        self.all_methods = {
            "DeepLift": DeepLift,
            "IntegratedGradients": IntegratedGradients,
            "Saliency": Saliency,
            "ShapleyValueSampling": ShapleyValueSampling,
            "KernelShap": KernelShap,
            "InputXGradient": InputXGradient,
            "FeatureAblation": FeatureAblation,
            "Lime": Lime,
        }
        assert method in self.all_methods, (
            f"Method {method} not available. Choose one of the following: "
            f"{list(self.all_methods)}"
        )
        assert sklearn is False or method not in [
            "DeepLift",
            "IntegratedGradients",
            "Saliency",
            "InputXGradient",
        ], f"""Method {method} not available for sklearn models.
            Choose one of the following: ['ShapleyValueSampling', 'KernelShap',
            'FeatureAblation', 'Lime']"""
        self.device = device

    def __forward_wrapper__(self, tensor: torch.Tensor) -> torch.Tensor:
        """Wrapper for sklearn model to convert the input in tensors."""
        x_in = tensor.detach().cpu().numpy()
        if self.task == "regression":
            with torch.no_grad():
                output = self.model.predict(x_in)
        elif self.task == "classification":
            with torch.no_grad():
                output = self.model.predict_proba(x_in)
        return torch.from_numpy(output)


[docs]
    def init_explainer(self) -> captum.attr.Attribution:
        """Initialize Captum explainer.

        Returns:
            captum.attr.Attribution: explainer
        """
        if self.sklearn:
            explainer = self.all_methods[self.method](self.__forward_wrapper__)
        else:
            explainer = self.all_methods[self.method](self.model)
        self.explainer = explainer
        return explainer



[docs]
    def explain(
        self,
        x_test: pd.DataFrame,
        label: Optional[Union[int, list, torch.Tensor, np.ndarray]] = None,
        absolute: bool = False,
    ) -> torch.Tensor:
        """Explain the whole set.

        Args:
            x_test (pd.DataFrame): test set
            label (int, list, np.ndarray, torch.Tensor, optional): label(s) of interest.
                Defaults to None. A list of labels for each instance can be provided.
            absolute (bool, optional): whether to use the absolute value of the attributions.
                Defaults to False.

        Returns:
            torch.Tensor: array of attributions (n_samples, n_features)
        """
        assert self.explainer is not None, "Explainer not initialized"
        x_tensor_test = x_test
        if isinstance(x_test, pd.DataFrame):
            x_tensor_test = torch.tensor(x_tensor_test.values).float()
        elif isinstance(x_test, np.ndarray):
            x_tensor_test = torch.tensor(x_tensor_test).float()
        if self.task == "regression":
            target = None
        elif self.task == "classification" and label is None:
            target = self.model.predict(x_tensor_test)
        else:
            target = label
        if self.task == "classification" and not isinstance(target, int):
            if isinstance(target, (np.ndarray, list)):
                target = torch.tensor(target, device=self.device)
            target = target.long()
        x_tensor_test = x_tensor_test.to(self.device)
        if self.method in ["Lime", "ShapleyValueSampling", "KernelShap"]:
            attributions = self.explainer.attribute(
                x_tensor_test, target=target, perturbations_per_eval=32
            )
        else:
            attributions = self.explainer.attribute(x_tensor_test, target=target)
        attributions = attributions.float()
        if absolute:
            attributions = torch.abs(attributions)
        attributions = attributions / torch.norm(attributions)
        return attributions