Source code for aif360.metrics.sample_distortion_metric

from functools import partial
import numpy as np
import scipy.spatial.distance as scdist

from aif360.datasets import StructuredDataset
from aif360.metrics import DatasetMetric, utils


[docs]class SampleDistortionMetric(DatasetMetric): """Class for computing metrics based on two StructuredDatasets.""" def __init__(self, dataset, distorted_dataset, unprivileged_groups=None, privileged_groups=None): """ Args: dataset (StructuredDataset): A StructuredDataset. distorted_dataset (StructuredDataset): A StructuredDataset. privileged_groups (list(dict)): Privileged groups. Format is a list of `dicts` where the keys are `protected_attribute_names` and the values are values in `protected_attributes`. Each `dict` element describes a single group. See examples for more details. unprivileged_groups (list(dict)): Unprivileged groups in the same format as `privileged_groups`. Raises: TypeError: `dataset` and `distorted_dataset` must be :obj:`~aif360.datasets.StructuredDataset` types. """ # sets self.dataset, self.unprivileged_groups, self.privileged_groups super(SampleDistortionMetric, self).__init__(dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) if isinstance(distorted_dataset, StructuredDataset): self.distorted_dataset = distorted_dataset else: raise TypeError("'distorted_dataset' should be a StructuredDataset") with dataset.temporarily_ignore('features', 'labels', 'scores'): if dataset != distorted_dataset: raise ValueError("The two datasets may differ in features and " "labels/scores only.") def total(self, dist, privileged): distance, weights = dist(privileged=privileged, returned=True) return np.sum(distance * weights) def average(self, dist, privileged): distance, weights = dist(privileged=privileged, returned=True) return np.average(distance, weights=weights) def maximum(self, dist, privileged): return np.max(dist(privileged=privileged))
[docs] def euclidean_distance(self, privileged=None, returned=False): """Compute the average Euclidean distance between the samples from the two datasets. """ condition = self._to_condition(privileged) distance, mask = utils.compute_distance(self.dataset.features, self.distorted_dataset.features, self.dataset.protected_attributes, self.dataset.protected_attribute_names, dist_fun=scdist.euclidean, condition=condition) if returned: return distance, self.dataset.instance_weights[mask] return distance
[docs] def manhattan_distance(self, privileged=None, returned=False): """Compute the average Manhattan distance between the samples from the two datasets. """ condition = self._to_condition(privileged) distance, mask = utils.compute_distance(self.dataset.features, self.distorted_dataset.features, self.dataset.protected_attributes, self.dataset.protected_attribute_names, dist_fun=scdist.cityblock, condition=condition) if returned: return distance, self.dataset.instance_weights[mask] return distance
[docs] def mahalanobis_distance(self, privileged=None, returned=False): """Compute the average Mahalanobis distance between the samples from the two datasets. """ condition = self._to_condition(privileged) X_orig = self.dataset.features X_distort = self.distorted_dataset.features dist_fun = partial(scdist.mahalanobis, VI=np.linalg.inv(np.cov(np.vstack([X_orig, X_distort]).T)).T) distance, mask = utils.compute_distance(X_orig, X_distort, self.dataset.protected_attributes, self.dataset.protected_attribute_names, dist_fun=dist_fun, condition=condition) if returned: return distance, self.dataset.instance_weights[mask] return distance
def total_euclidean_distance(self, privileged=None): return self.total(self.euclidean_distance, privileged=privileged) def total_manhattan_distance(self, privileged=None): return self.total(self.manhattan_distance, privileged=privileged) def total_mahalanobis_distance(self, privileged=None): return self.total(self.mahalanobis_distance, privileged=privileged) def average_euclidean_distance(self, privileged=None): return self.average(self.euclidean_distance, privileged=privileged) def average_manhattan_distance(self, privileged=None): return self.average(self.manhattan_distance, privileged=privileged) def average_mahalanobis_distance(self, privileged=None): return self.average(self.mahalanobis_distance, privileged=privileged) def maximum_euclidean_distance(self, privileged=None): return self.maximum(self.euclidean_distance, privileged=privileged) def maximum_manhattan_distance(self, privileged=None): return self.maximum(self.manhattan_distance, privileged=privileged) def maximum_mahalanobis_distance(self, privileged=None): return self.maximum(self.mahalanobis_distance, privileged=privileged)
[docs] def mean_euclidean_distance_difference(self, privileged=None): """Difference of the averages.""" return self.difference( self.average(self.euclidean_distance, privileged=privileged))
[docs] def mean_manhattan_distance_difference(self, privileged=None): """Difference of the averages.""" return self.difference( self.average(self.manhattan_distance, privileged=privileged))
[docs] def mean_mahalanobis_distance_difference(self, privileged=None): """Difference of the averages.""" return self.difference( self.average(self.mahalanobis_distance, privileged=privileged))
[docs] def mean_euclidean_distance_ratio(self, privileged=None): """Ratio of the averages.""" return self.ratio( self.average(self.euclidean_distance, privileged=privileged))
[docs] def mean_manhattan_distance_ratio(self, privileged=None): """Ratio of the averages.""" return self.ratio( self.average(self.manhattan_distance, privileged=privileged))
[docs] def mean_mahalanobis_distance_ratio(self, privileged=None): """Ratio of the averages.""" return self.ratio( self.average(self.mahalanobis_distance, privileged=privileged))