Source code for beexai.evaluate.metrics.sensitivity

from typing import Callable, Optional, Union

import numpy
import torch

from beexai.evaluate.metrics.metrics import CustomMetric
from beexai.explanation.explaining import GeneralExplainer
from beexai.utils.time_seed import time_function


[docs] class Sensitivity(CustomMetric): """Implementation of the sensitivity metric. Computes the sensitivity of the model by adding significant noise to the input and compute the difference in attributions between the original input and the input with a small perturbation. References: - `On the (In)fidelity and Sensitivity for Explanations <https://arxiv.org/abs/1901.09392>` Attributes: model (callable): model to explain task (str): task to perform device (str): device to use explainer (object): explainer to use radius (float): radius of the uniform distribution to generate the noise Methods: get_sens: computes the sensitivity of the model """ def __init__( self, model: Callable, task: str, device: str, explainer: GeneralExplainer, radius=0.5, ): super().__init__(model, task, device) self.explainer = explainer self.radius = radius def __get_noises__(self, x_in: torch.Tensor, k: int = 5): """Generate k noises from a uniform distribution with mean 0 and radius self.radius.""" n_shape = (k, x_in.shape[0], x_in.shape[1]) noises = torch.rand(n_shape, device=self.device) radius = torch.tensor(self.radius, device=self.device) if radius.ndim == 0: radius = radius.repeat(x_in.shape[1]) noises = 2 * radius[None, None, :] * noises - radius[None, None, :] return noises
[docs] def get_sens( self, x_in: torch.Tensor, label: Optional[Union[int, list, torch.Tensor]] = None, attributions: Optional[torch.Tensor] = None, ) -> float: """Computes the sensitivity of the model. Args: x_in (torch.Tensor): input to compute the sensitivity score label (int, list, np.ndarray, torch.Tensor, optional): label(s) of interest. Defaults to None. A list of labels for each instance can be provided. attributions (torch.Tensor, optional): attributions for each instance. Defaults to None. If None, the attributions are computed using the explainer. Returns: float: sensitivity score """ self.check_shape(x_in, x_in) noises = self.__get_noises__(x_in) _, target = self.select_output(x_in, label=label) if isinstance(target, (numpy.ndarray, list)): target = torch.tensor(target, device=self.device) if attributions is None: attributions = self.explainer.explain(x_in, label=target) self.check_shape(x_in, attributions) sensitivities = torch.zeros((len(noises), x_in.shape[0]), device=self.device) for _, j in enumerate(range(len(noises))): noise = noises[j] pert_in = x_in - noise pert_att = self.explainer.explain(pert_in, label=target) sensitivity = torch.norm(attributions - pert_att, dim=1) rho = torch.norm(noise.flatten()) sensitivities[j] = sensitivity / rho all_sens = torch.max(sensitivities, dim=0).values return 100 * torch.mean(all_sens, axis=0).item()
[docs] @time_function def compute_sens( model: Callable, rand_model: Callable, task: str, x_test: torch.Tensor, label: Union[int, list, torch.Tensor], metrics: dict, exp: GeneralExplainer, randmodel_exp: GeneralExplainer, device: str = "cpu", use_rand: bool = True, attributions=None, rand_attributions=None, randmodel_attributions=None, radius=0.5, ) -> dict: """Computes the sensitivity score of the model. Args: model (callable): base model rand_model (callable): reference model (random model) task (str): task to perform x_test (torch.Tensor): test data label (int, list, np.ndarray, torch.Tensor): label(s) of interest metrics (dict): dictionary of metrics exp (GeneralExplainer): base explainer randmodel_exp (GeneralExplainer): explainer for the random model device (str, optional): device to use. Defaults to "cpu". use_rand (bool, optional): whether to use the random explainer. Defaults to True. attributions (torch.Tensor, optional): attributions for each instance. Defaults to None. rand_attributions (torch.Tensor, optional): attributions for each instance for the random explainer. Defaults to None. randmodel_attributions (torch.Tensor, optional): attributions for each instance for the random model. Defaults to None. radius (float, optional): radius of the uniform distribution to generate the noise. Defaults to 0.5. Returns: dict: dict of metrics """ sens = Sensitivity(model, task, device, exp, radius=radius) sens_score = sens.get_sens(x_test, label, attributions) metrics["Sensitivity"]["original"] = sens_score if use_rand: rand_sens_score = sens.get_sens(x_test, label, rand_attributions) metrics["Sensitivity"]["random"] = rand_sens_score if rand_model is not None: randmodel_sens = Sensitivity( rand_model, task, device, randmodel_exp, radius=radius ) randmodel_sens_score = randmodel_sens.get_sens( x_test, label, randmodel_attributions ) metrics["Sensitivity"]["random_model"] = randmodel_sens_score return metrics