Source code for src.aequitas.fairness

import logging

import pandas as pd

logging.getLogger(__name__)

__author__ = "Rayid Ghani, Pedro Saleiro <saleiro@uchicago.edu>, Loren Hinkson"
__copyright__ = "Copyright \xa9 2018. The University of Chicago. All Rights Reserved."


[docs]class Fairness(object): """ """ def __init__(self, fair_eval=None, tau=None, fair_measures_depend=None, type_parity_depend=None, high_level_fairness_depend=None): """ :param fair_eval: a lambda function that is used to assess fairness (e.g. 80% rule) :param tau: the threshold for fair/unfair evaluation :param fair_measures_depend: a dictionary containing fairness measures as keys and the corresponding input bias disparity as values :param type_parity_depend: a dictionary with Type I, Type II, and Equalized Odds fairness measures as keys and lists of their underlying bias metric parities as values :param high_level_fairness_depend: a dictionary with supervised and unsupervised fairness as keys and lists of their underlying metric parities as values. """ if not fair_eval: self.fair_eval = lambda tau: lambda x: pd.np.nan if pd.np.isnan(x) else \ (True if tau <= x <= 1 / tau else False) else: self.fair_eval = fair_eval # tau is the fairness_threshold and should be a real ]0.0 and 1.0] if not tau: self.tau = 0.8 else: self.tau = tau # Set high-level fairness evaluation to NA (undefined) if both # underlying parity determinations are NA. If only one parity is NA, # evaluation is determined by the defined parity. self.high_level_pair_eval = lambda col1, col2: lambda x: pd.np.nan if (pd.np.isnan(x[col1]) and pd.np.isnan(x[col2])) \ else \ (True if (x[col1] is True and x[col2] is True) else False) self.high_level_single_eval = lambda col: lambda x: pd.np.nan if pd.np.isnan(x[col]) else (True if x[col] is True else False) # the fair_measures_depend define the bias metrics that serve as input to the fairness evaluation and respective # fairness measure. basically these are the fairness measures supported by the current version of aequitas. if not fair_measures_depend: self.fair_measures_depend = {'Statistical Parity': 'ppr_disparity', 'Impact Parity': 'pprev_disparity', 'FDR Parity': 'fdr_disparity', 'FPR Parity': 'fpr_disparity', 'FOR Parity': 'for_disparity', 'FNR Parity': 'fnr_disparity', 'TPR Parity': 'tpr_disparity', 'TNR Parity': 'tnr_disparity', 'NPV Parity': 'npv_disparity', 'Precision Parity': 'precision_disparity' } else: self.fair_measures_depend = fair_measures_depend # the self.fair_measures represents the list of fairness_measures to be calculated by default self.fair_measures_supported = self.fair_measures_depend.keys() if not type_parity_depend: self.type_parity_depend = {'TypeI Parity': ['FDR Parity', 'FPR Parity'], 'TypeII Parity': ['FOR Parity', 'FNR Parity'], 'Equalized Odds': ['FPR Parity', 'TPR Parity']} else: self.type_parity_depend = type_parity_depend # high level fairness_depend define which input fairness measures are used to calculate the high level ones if not high_level_fairness_depend: self.high_level_fairness_depend = { 'Unsupervised Fairness': ['Statistical Parity', 'Impact Parity'], 'Supervised Fairness': ['TypeI Parity', 'TypeII Parity'] } else: self.high_level_fairness_depend = high_level_fairness_depend
[docs] def get_fairness_measures_supported(self, input_df): """ Determine fairness measures supported based on columns in data frame. """ if 'label_value' not in input_df.columns: self.fair_measures_supported = ['Statistical Parity', 'Impact Parity'] return self.fair_measures_supported
[docs] def get_group_value_fairness(self, bias_df, tau=None, fair_measures_requested=None): """ Calculates the fairness measures defined in fair_measures_requested dictionary and adds them as columns to the input bias_df. :param bias_df: the output dataframe from bias/ disparity calculation methods. :param tau: optional, the threshold for fair/ unfair evaluation. :param fair_measures_requested: optional, a dictionary containing fairness measures as keys and the corresponding input bias disparity as values. :return: Bias_df dataframe with additional columns for each of the fairness measures defined in the fair_measures dictionary """ logging.info('get_group_value_fairness...') if not tau: tau = self.tau if not fair_measures_requested: fair_measures_requested = self.fair_measures_supported for fair, input in self.fair_measures_depend.items(): if fair in fair_measures_requested: bias_df[fair] = bias_df[input].apply(self.fair_eval(tau)) for fair, input in self.type_parity_depend.items(): if input[0] in bias_df.columns: if input[1] in bias_df.columns: bias_df[fair] = bias_df.apply(self.high_level_pair_eval(input[0], input[1]), axis=1) else: bias_df[fair] = bias_df.apply(self.high_level_single_eval(input[0]), axis=1) elif input[1] in bias_df.columns: bias_df[fair] = bias_df.apply(self.high_level_single_eval(input[1]), axis=1) else: print('get_group_value_fairness: No Parity measure input found on bias_df') for fair, input in self.high_level_fairness_depend.items(): if input[0] in bias_df.columns: if input[1] in bias_df.columns: bias_df[fair] = bias_df.apply(self.high_level_pair_eval(input[0], input[1]), axis=1) else: bias_df[fair] = bias_df.apply(self.high_level_single_eval(input[0]), axis=1) elif input[1] in bias_df.columns: bias_df[fair] = bias_df.apply(self.high_level_single_eval(input[1]), axis=1) if 'Unsupervised Fairness' not in bias_df.columns and 'Supervised Fairness' not in bias_df.columns: logging.info('get_group_value_fairness: No high level measure input found on bias_df' + input[1]) return bias_df
def _fill_groupby_attribute_fairness(self, groupby_obj, key_columns, group_attribute_df, measures): """ Returns dataframe with values grouped by attribute_value """ logging.info('fill_groupby_attribute_fairness') for key in measures: rows = [] for group, values in groupby_obj: group_df = groupby_obj.get_group(group) if group_df[key].isnull().all(): row = group_df.iloc[0][key_columns + [key]] else: group_df = group_df[group_df[key].notnull()][key_columns + [key]] row = group_df.loc[group_df[key].astype(bool).idxmin()] rows.append(row) key_df = pd.DataFrame(rows) if group_attribute_df.empty: group_attribute_df = key_df else: group_attribute_df = group_attribute_df.merge(key_df, on=key_columns) return group_attribute_df
[docs] def get_group_attribute_fairness(self, group_value_df, fair_measures_requested=None): """ Determines whether the minimum value for each fairness measure in fair_measures_requested is 'False' across all attribute_values defined by a group attribute_name. If 'False' is present, determination for the attribute is False for given fairness measure. :param group_value_df: output dataframe of get_group_value_fairness() method :return: A dataframe of fairness measures at the attribute level (no attribute_values) """ logging.info('get_group_attribute_fairness') if not fair_measures_requested: fair_measures_requested = self.fair_measures_supported group_attribute_df = pd.DataFrame() key_columns = ['model_id', 'score_threshold', 'attribute_name'] groupby_variable = group_value_df.groupby(key_columns) # We need to do this because of NaNs. idxmin() on pandas raises keyerror if there is a NaN... group_attribute_df = self._fill_groupby_attribute_fairness(groupby_variable, key_columns, group_attribute_df, fair_measures_requested) if group_attribute_df.empty: raise Exception('get_group_attribute_fairness: no fairness measures requested found on input group_value_df columns') parity_cols = [col for col in self.type_parity_depend if col in group_value_df.columns] group_attribute_df = self._fill_groupby_attribute_fairness(groupby_variable, key_columns, group_attribute_df, parity_cols) highlevel_cols = [col for col in self.high_level_fairness_depend if col in group_value_df.columns] group_attribute_df = self._fill_groupby_attribute_fairness(groupby_variable, key_columns, group_attribute_df, highlevel_cols) return group_attribute_df
[docs] def get_overall_fairness(self, group_attribute_df): """ Calculates overall fairness regardless of the group_attributes. Searches for 'False' parity determinations across group_attributes and outputs 'True' determination if all group_attributes are fair. :param group_attribute_df: the output df of the get_group_attributes_fairness :return: A dictionary of overall, unsupervised, and supervised fairness determinations """ overall_fairness = {} if 'Unsupervised Fairness' in group_attribute_df.columns: overall_fairness['Unsupervised Fairness'] = False if \ group_attribute_df['Unsupervised Fairness'].min() == False else True if 'Supervised Fairness' in group_attribute_df.columns: overall_fairness['Supervised Fairness'] = False if group_attribute_df['Supervised Fairness'].min() == False else True fair_vals = [val for key, val in overall_fairness.items()] if False in fair_vals: overall_fairness['Overall Fairness'] = False elif True in fair_vals: overall_fairness['Overall Fairness'] = True else: overall_fairness['Overall Fairness'] = 'Undefined' return overall_fairness
[docs] def list_parities(self, df): """ View list of all parity determinations in df """ all_fairness = self.type_parity_depend.keys() | \ self.high_level_fairness_depend.keys() | \ self.fair_measures_depend.keys() return list(all_fairness & set(df.columns))