""" Module for evaluation of feature importance explanations. """
import warnings
import numpy as np
import sklearn.metrics as met
from scipy.spatial.distance import cdist
from teex._utils._arrays import _binarize_arrays, _check_correct_array_values
from teex._utils._errors import MetricNotAvailableError
from teex.featureImportance.data import scale_fi_bounds
_AVAILABLE_FEATURE_IMPORTANCE_METRICS = {'fscore', 'prec', 'rec', 'cs', 'auc'}
def _individual_fi_metrics(gt, pred, binGt, binPred, metric, predsNegative, thresholdType):
""" Classification and real vector metrics. If metric='auc' and predsNegative=True, 'pred' is modified accordingly
(see :func:`feature_importance_scores`).
:param gt: (ndarray) of shape (nFeatures,). Ground truth (real or binary) vector.
:param pred: (ndarray) of shape (nFeatures,). Predicted (real or binary) vector.
:param binGt: (ndarray) of shape (nFeatures,). Ground truth (binary) vector.
:param binPred: (ndarray) of shape (nFeatures,). Predicted (binary) vector.
:param metric: (str) in ['fscore', 'prec', 'rec', 'cs', 'auc'] metric to compute.
:param predsNegative: (bool) whether 'pred' contains negative values or not.
:param thresholdType: (str) in ['abs', 'thres].
:return: (float) selected metric. """
if metric == 'fscore':
return met.fbeta_score(binGt, binPred, beta=1)
elif metric == 'prec':
return met.precision_score(binGt, binPred)
elif metric == 'rec':
return met.recall_score(binGt, binPred)
elif metric == 'cs':
return cosine_similarity(gt, pred)
elif metric == 'auc':
if predsNegative is True:
pred = np.abs(pred) if thresholdType == 'abs' else np.where(pred < 0, 0, pred)
return met.roc_auc_score(binGt, pred)
def _compute_feature_importance_scores(binaryGts, binaryPreds, gts, preds, classScores, realScores, metrics,
predsNegative, thresholdType, verbose):
ret = []
rng = np.random.default_rng(888)
someUnifBGt, someUnifBPred, someEmptyGt, someEmptyPred = False, False, False, False
for binGt, binPred, gt, pred in zip(binaryGts, binaryPreds, gts, preds):
mets = []
uniformBGt, uniformBPred, emptyGt, emptyPred = _check_correct_array_values(binGt, binPred, gt, pred)
i = rng.integers(0, len(binGt))
if classScores:
if uniformBGt:
someUnifBGt = True
binGt[i] = int(not binGt[i])
if uniformBPred:
someUnifBPred = True
binPred[i] = int(not binPred[i])
if realScores:
if emptyGt:
someEmptyGt = True
gt[i] += 1e-4
if emptyPred:
someEmptyPred = True
pred[i] += 1e-4
for metric in metrics:
mets.append(_individual_fi_metrics(gt, pred, binGt, binPred, metric, predsNegative, thresholdType))
ret.append(mets)
if verbose:
if someUnifBGt:
warnings.warn('A binary ground truth contains uniform values, so one entry has been randomly flipped '
'for the metrics to be defined.')
if someUnifBPred:
warnings.warn('A binary prediction contains uniform values, so one entry has been randomly flipped '
'for the metrics to be defined.')
if someEmptyGt:
warnings.warn('A ground truth does not contain values != 0, so 1e-4 has been added to one random entry '
'in both.')
if someEmptyPred:
warnings.warn('A prediction does not contain values != 0, so 1e-4 has been added to one random entry '
'in both.')
return np.array(ret).astype(np.float32)
[docs]def feature_importance_scores(gts, preds, metrics=None, average: bool = True, thresholdType: str = 'abs', binThreshold: float = 0.5, verbose: bool = True):
""" Computes quality metrics between one or more feature importance vectors. The values in the vectors must be
bounded in [0, 1] or [-1, 1] (to indicate negative importances in the second case). If they are not, the values will
be mapped.
For the computation of the precision, recall and FScore, the vectors are binarized to simulate a classification
setting depending on the param. :code:`thresholdType`. In the case of ROC AUC, the ground truth feature importance
vector will be binarized as in the case of 'precision', 'recall' and 'FScore' and the predicted feature importance
vector entries will be considered as prediction scores. If the predicted vectors contain negative values, these will
be either mapped to 0 or taken their absolute val (depending on the chosen option in the param.
:code:`thresholdType`).
**Edge cases**: Edge cases for when metrics are not defined have been accounted for:
* When computing classification scores ('fscore', 'prec', 'rec'), if there is only one class in the
ground truth and / or the prediction, one random feature will be flipped (same feature in both).
Note that some metrics such as 'auc' may still be undefined in this case if there is only 1 feature per data
observation.
* For 'auc', although the ground truth is binarized, the prediction vector represents scores, and so, if both
contain only one value, only in the ground truth a feature will be flipped. In the prediction, a small amount
(:math:`1^{-4}`) will be summed to a random feature if no value is != 0.
* When computing cosine similarity, if there is no value != 0 in the ground truth and / or prediction, one
random feature will be summed 1e-4.
**On vector ranges**: If the ground truth array or the predicted array have values that are not bounded in
:math:`[-1, 1]` or :math:`[0, 1]`, they will be mapped accordingly. Note that if the values lie within
:math:`[-1, 1]` or :math:`[0, 1]` no mapping will be performed, so it is assumed that the scores represent feature
importances in those ranges. These are the cases considered for the mapping:
* if values in the :math:`[0, \\infty]` range: map to :math:`[0, 1]`
* if values in the :math:`[-\\infty, 0]` range: map to :math:`[-1, 1]`
* if values in the :math:`[-\\infty, \\infty]` range: map to :math:`[-1, 1]`
:param np.ndarray gts: (1d np.ndarray or 2d np.ndarray of shape (n_features, n_samples)) ground truth feature
importance vectors.
:param np.ndarray preds: (1d np.ndarray or 2d np.ndarray of shape (n_features, n_samples)) predicted feature
importance vectors.
:param metrics:
(str or array-like of str) metric/s to be computed. Available metrics are
- 'fscore': Computes the F1 Score between the ground truths and the predicted vectors.
- 'prec': Computes the Precision Score between the ground truths and the predicted vectors.
- 'rec': Computes the Recall Score between the ground truths and the predicted vectors.
- 'auc': Computes the ROC AUC Score between the ground truths and the predicted vectors.
- 'cs': Computes the Cosine Similarity between the ground truths and the predicted vectors.
The vectors are automatically binarized for computing recall, precision and fscore.
:param bool average: (default :code:`True`) Used only if :code:`gt` and :code:`rule` contain multiple
observations. Should the computed metrics be averaged across all the samples?
:param str thresholdType: Options for the binarization of the features for the computation of 'fscore', 'prec',
'rec' and 'auc'.
- 'abs': features with absolute val <= :code:`binThreshold` will be set to 0 and 1 otherwise. For the
predicted feature importances in the case of 'auc', their absolute val will be taken.
- 'thres': features <= :code:`binThreshold` will be set to 0, 1 otherwise. For the `predicted` feature
importances in the case of 'auc', negative values will be cast to 0 and the others left *as-is*.
:param float binThreshold:
(in [-1, 1]) Threshold for the binarization of the features for the computation of 'fscore', 'prec', 'rec' and
'auc'. The binarization depends on both this parameter and :code:`thresholdType`.
If :code:`thresholdType = 'abs'`, ``binThreshold`` cannot be negative.
:param bool verbose: Verbosity of warnings. ``True`` will report warnings, ```False``` will not.
:return: (ndarray of shape (n_metrics,) or (n_samples, n_metrics)) specified metric/s in the indicated order. """
if metrics is None:
metrics = ['fscore']
elif isinstance(metrics, str):
metrics = [metrics]
elif len(metrics) == 0:
return np.array([], dtype=np.float32)
for metric in metrics:
if metric not in _AVAILABLE_FEATURE_IMPORTANCE_METRICS:
raise MetricNotAvailableError(metric)
gts, _ = scale_fi_bounds(gts, verbose)
preds, predsNegative = scale_fi_bounds(preds, verbose)
# binarize if necessary
if not np.array_equal(np.unique(gts), np.array([0, 1])):
binaryGts = _binarize_arrays(gts, method=thresholdType, threshold=binThreshold)
else:
binaryGts = gts.copy()
if not np.array_equal(np.unique(preds), np.array([0, 1])):
binaryPreds = _binarize_arrays(preds, method=thresholdType, threshold=binThreshold)
else:
binaryPreds = preds.copy()
# if we have one observation, reshape it accordingly
if len(binaryPreds.shape) == 1:
binaryGts, binaryPreds = binaryGts.reshape(1, -1), binaryPreds.reshape(1, -1)
gts, preds = gts.reshape(1, -1), preds.reshape(1, -1)
# check if we are computing classification scores. This will reduce computations if ground truth vectors are
# completely 0
classScores, realScores = False, False
for metric in metrics:
if metric in ['fscore', 'prec', 'rec', 'auc']:
classScores = True
elif metric in ['cs']:
realScores = True
ret = _compute_feature_importance_scores(binaryGts, binaryPreds, gts, preds, classScores, realScores, metrics,
predsNegative, thresholdType, verbose)
if average is True and binaryPreds.shape[0] > 1:
ret = np.mean(ret, axis=0)
elif binaryPreds.shape[0] == 1:
return ret.squeeze()
return ret
[docs]def cosine_similarity(u, v, bounding: str = 'abs') -> float:
"""
Computes cosine similarity between two real valued arrays. If negative, returns 0.
:param u: (array-like), real valued array of dimension n.
:param v: (array-like), real valued array of dimension n.
:param str bounding: if the CS is < 0, bound it in [0, 1] via absolute val ('abs') or max(0, val) ('max')
:return float: (0, 1) cosine similarity. """
dist = 1 - cdist([u], [v], metric='cosine')[0][0]
if bounding == 'abs':
return np.abs(dist)
elif bounding == 'max':
return max(0, dist)
else:
raise ValueError('bounding method not valid.')