Skip to content

Auditioner

The Auditioner class is the main entry point for the Audition module. Users pass its constructor a database connection, information about the model groups to be evaluated, and a specification for a filter to prune the worst-performing models.

Other methods allow users to define more complex selection rules, list selected models, or plot results from the selection process.

Attributes#

logger = verboselogs.VerboseLogger(__name__) module-attribute #

Classes#

AuditionRunner #

Source code in src/triage/component/audition/__init__.py
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
class AuditionRunner:
    def __init__(self, config_dict, db_engine, directory=None):
        self.dir = directory
        self.config = config_dict
        self.db_engine = db_engine

    def run(self):
        pre_aud = PreAudition(self.db_engine)
        model_group_ids = pre_aud.get_model_groups(self.config["model_groups"]["query"])
        query_end_times = self.config["time_stamps"]["query"].format(
            ", ".join(map(str, model_group_ids))
        )
        end_times = pre_aud.get_train_end_times(query=query_end_times)

        aud = Auditioner(
            db_engine=self.db_engine,
            model_group_ids=model_group_ids,
            train_end_times=end_times,
            initial_metric_filters=[
                {
                    "metric": self.config["filter"]["metric"],
                    "parameter": self.config["filter"]["parameter"],
                    "max_from_best": self.config["filter"]["max_from_best"],
                    "threshold_value": self.config["filter"]["threshold_value"],
                }
            ],
            models_table=self.config["filter"]["models_table"],
            distance_table=self.config["filter"]["distance_table"],
            directory=self.dir,
            agg_type=self.config["filter"].get("agg_type") or 'worst',
        )

        aud.plot_model_groups()
        aud.register_selection_rule_grid(rule_grid=self.config["rules"], plot=True)
        aud.save_result_model_group_ids()

        logger.debug(f"Audition ran! Results are stored in {self.dir}.")

    def validate(self):
        try:
            logger.debug("Validate!")
        except Exception as err:
            raise err

Attributes#

config = config_dict instance-attribute #
db_engine = db_engine instance-attribute #
dir = directory instance-attribute #

Functions#

__init__(config_dict, db_engine, directory=None) #
Source code in src/triage/component/audition/__init__.py
407
408
409
410
def __init__(self, config_dict, db_engine, directory=None):
    self.dir = directory
    self.config = config_dict
    self.db_engine = db_engine
run() #
Source code in src/triage/component/audition/__init__.py
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def run(self):
    pre_aud = PreAudition(self.db_engine)
    model_group_ids = pre_aud.get_model_groups(self.config["model_groups"]["query"])
    query_end_times = self.config["time_stamps"]["query"].format(
        ", ".join(map(str, model_group_ids))
    )
    end_times = pre_aud.get_train_end_times(query=query_end_times)

    aud = Auditioner(
        db_engine=self.db_engine,
        model_group_ids=model_group_ids,
        train_end_times=end_times,
        initial_metric_filters=[
            {
                "metric": self.config["filter"]["metric"],
                "parameter": self.config["filter"]["parameter"],
                "max_from_best": self.config["filter"]["max_from_best"],
                "threshold_value": self.config["filter"]["threshold_value"],
            }
        ],
        models_table=self.config["filter"]["models_table"],
        distance_table=self.config["filter"]["distance_table"],
        directory=self.dir,
        agg_type=self.config["filter"].get("agg_type") or 'worst',
    )

    aud.plot_model_groups()
    aud.register_selection_rule_grid(rule_grid=self.config["rules"], plot=True)
    aud.save_result_model_group_ids()

    logger.debug(f"Audition ran! Results are stored in {self.dir}.")
validate() #
Source code in src/triage/component/audition/__init__.py
444
445
446
447
448
def validate(self):
    try:
        logger.debug("Validate!")
    except Exception as err:
        raise err

Auditioner #

Source code in src/triage/component/audition/__init__.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
class Auditioner:
    def __init__(
        self,
        db_engine,
        model_group_ids,
        train_end_times,
        initial_metric_filters,
        models_table=None,
        distance_table=None,
        directory=None,
        agg_type='worst',
        baseline_model_group_ids=None,
    ):
        """Filter model groups using a two-step process:

        1. Broad thresholds to filter out truly bad models
        2. A selection rule grid to find the best model groups over time
            for each of a variety of methods

        This is achieved by creating a 'best distance' table, which functions like a
        denormalized 'model group/model/evaluations', storing for each
        model group/train end time/metric/parameter:
            1. the raw evaluation value,
            2. the distance of that evaluation metric from the best model group at that train time,
            3. and the distance of the metric from the best model group the *next* train time

        Each of the steps is computed based on the data in this table, and an iterative process of
            sending thresholding/selection configuration and viewing the results.

        For step 1, the initial configuration is sent in this constructor
            (as 'initial_metric_filters', format detailed below), future iterations of this
            configuration are sent to 'update_metric_filters'.

        For step 2, all configuration is sent to the object via 'register_selection_rule_grid',
            and its format is detailed in that method's docstring

        Args:
            db_engine (sqlalchemy.engine): A database engine with access to a
                results schema of a completed modeling run
            model_group_ids (list): A large list of model groups to audition. No effort should
                be needed to pick 'good' model groups, but they should all be groups that could
                be used if they are found to perform well. They should also each have evaluations
                for any train end times you wish to include in analysis
            train_end_times (list): A list of train end times that all of the given model groups
                contain evaluations for and that you want to be deemed important in the analysis
            initial_metric_filters (list): A list of metrics to filter model
                groups on, and how to filter them. Each entry should be a dict
                of the format:

                    {
                        'metric': 'string',
                        'parameter': 'string',
                        'max_below_best': .5,
                        'threshold_value': .5
                     }

                    metric (string): model evaluation metric, such as 'precision@'
                    parameter (string): model evaluation metric parameter,
                        such as '300_abs'
                    max_below_best (float): The maximum value that the given metric
                        can be below the best for a given train end time
                    threshold_value (float): The minimum value that the given metric can be
            models_table (string, optional): The name of the results schema
                models table that you want to use. Will default to 'models',
                which is also the default in triage.
            distance_table (string, optional): The name of the 'best distance' table to use.
                Will default to 'best_distance', but this can be sent if you want to avoid
                clobbering the results from a prior analysis.
            agg_type (string) Method for aggregating metric values (for instance, if there
                are multiple models at a given train_end_time with different random seeds).
                Can be: 'mean', 'best', or 'worst' (the default)
            baseline_model_group_ids (list): An optional list of model groups for baseline 
                models which will be included on all plots without being subject to filtering 
                or included as candidate models from the selection process.
        """
        self.metric_filters = initial_metric_filters
        # sort the train end times so we can reliably pick off the last time later
        self.train_end_times = sorted(train_end_times)
        self.directory = directory
        models_table = models_table or "models"
        distance_table = distance_table or "best_distance"
        self.distance_from_best_table = DistanceFromBestTable(
            db_engine=db_engine,
            models_table=models_table,
            distance_table=distance_table,
            agg_type=agg_type
        )
        self.best_distance_plotter = BestDistancePlotter(
            self.distance_from_best_table, self.directory
        )

        if baseline_model_group_ids:
            self.baseline_model_groups = model_groups_filter(
                train_end_times=train_end_times,
                initial_model_group_ids=baseline_model_group_ids,
                models_table=models_table,
                db_engine=db_engine,
            )
        else:
            self.baseline_model_groups = set([])

        self.first_pass_model_groups = model_groups_filter(
            train_end_times=train_end_times,
            initial_model_group_ids=model_group_ids,
            models_table=models_table,
            db_engine=db_engine,
        )

        self.model_group_thresholder = ModelGroupThresholder(
            distance_from_best_table=self.distance_from_best_table,
            train_end_times=train_end_times,
            initial_model_group_ids=self.first_pass_model_groups,
            initial_metric_filters=initial_metric_filters,
        )
        self.model_group_performance_plotter = ModelGroupPerformancePlotter(
            self.distance_from_best_table, self.directory
        )

        self.selection_rule_picker = SelectionRulePicker(self.distance_from_best_table)
        self.selection_rule_plotter = SelectionRulePlotter(
            self.selection_rule_picker, self.directory
        )
        self.selection_rule_performance_plotter = SelectionRulePerformancePlotter(
            self.selection_rule_picker, directory
        )

        # note we populate the distance from best table using both the
        # baseline and candidate model groups
        self.distance_from_best_table.create_and_populate(
            self.first_pass_model_groups | self.baseline_model_groups, 
            self.train_end_times, 
            self.metrics
        )
        self.results_for_rule = {}

    @property
    def metrics(self):
        return [
            {"metric": f["metric"], "parameter": f["parameter"]}
            for f in self.metric_filters
        ]

    @property
    def thresholded_model_group_ids(self) -> list:
        """The model group thresholder will have a varying list of model group ids
        depending on its current thresholding rules, this is a reference to whatever
        that current list is.

        Returns:
            list of model group ids allowed by the current metric threshold rules
        """
        return self.model_group_thresholder.model_group_ids

    @property
    def average_regret_for_rules(self) -> dict:
        """
        Returns the average regret for each selection rule, over the specified list of train/test periods.

        Returns:
            A dict with a key-value pair for each selection rule and the average regret for that rule. Structure:

                {'descriptive rule_name': .5}
        """
        result = dict()
        for k in self.results_for_rule.keys():
            result[k] = (
                self.results_for_rule[k]
                .groupby("selection_rule")["regret"]
                .mean()
                .to_dict()
            )
        return result

    @property
    def selection_rule_model_group_ids(self) -> dict:
        """
        Calculate the current winners for each selection rule and the most recent date

        Returns:
            A dict with a key-value pair for each selection rule and the list of n
            model_group_ids that it selected. Structure:

                {'descriptive rule_name':[1,2,3]}
        """
        logger.debug("Calculating selection rule picks for all rules")
        model_group_ids = dict()
        thresholded_ids = self.thresholded_model_group_ids
        for selection_rule in self.selection_rules:
            logger.debug("Calculating selection rule picks for %s", selection_rule)
            model_group_ids[
                selection_rule.descriptive_name
            ] = self.selection_rule_picker.model_group_from_rule(
                bound_selection_rule=selection_rule,
                model_group_ids=thresholded_ids,
                # evaluate the selection rules for the most recent
                # time period and use those as candidate model groups
                train_end_time=self.train_end_times[-1],
            )
            logger.debug(
                "For rule %s, model group %s was picked",
                selection_rule,
                model_group_ids[selection_rule.descriptive_name],
            )
        return model_group_ids

    def save_result_model_group_ids(self, fname="results_model_group_ids.json"):
        with open(os.path.join(self.directory, fname), "w") as f:
            f.write(json.dumps(self.selection_rule_model_group_ids))

    def plot_model_groups(self):
        """Display model group plots, one of the below for each configured metric.

        1. A cumulative plot showing the effect of different worse-than-best
        thresholds for the given metric across the thresholded model groups.

        2. A performance-over-time plot showing the value for the given
        metric over time for each thresholded model group
        """
        logger.debug("Showing best distance plots for all metrics")
        thresholded_model_group_ids = self.thresholded_model_group_ids
        if len(thresholded_model_group_ids) == 0:
            logger.warning(
                "Zero model group ids found that passed configured thresholds. "
                "Nothing to plot"
            )
            return
        self.best_distance_plotter.plot_all_best_dist(
            self.metrics, 
            thresholded_model_group_ids | self.baseline_model_groups, 
            self.train_end_times
        )
        logger.debug("Showing model group performance plots for all metrics")
        self.model_group_performance_plotter.plot_all(
            metric_filters=self.metric_filters,
            model_group_ids=thresholded_model_group_ids | self.baseline_model_groups,
            train_end_times=self.train_end_times,
        )

    def set_one_metric_filter(
        self,
        metric="precision@",
        parameter="100_abs",
        max_from_best=0.05,
        threshold_value=0.1,
    ):
        """Set one thresholding metric filter
        If one wnats to update multiple filters, one should use `update_metric_filters()` instead.

        Args:
            metric (string): model evaluation metric such as 'precision@'
            parameter (string): model evaluation parameter such as '100_abs'
            max_from_best (string): The maximum value that the given metric can be below the best
                for a given train end time
            threshold_value (string): The thresold value that the given metric can be
            plot (boolean, default True): Whether or not to also plot model group performance
                and thresholding details at this time.
        """
        new_filters = [
            {
                "metric": metric,
                "parameter": parameter,
                "max_from_best": max_from_best,
                "threshold_value": threshold_value,
            }
        ]
        self.update_metric_filters(new_filters)

    def update_metric_filters(self, new_filters=None, plot=True):
        """Update the thresholding metric filters

        Args:
            new_filters (list): A list of metrics to filter model
                groups on, and how to filter them. This is an identical format to
                the list given to 'initial_metric_filters' in the constructor.
                Each entry should be a dict with the keys:
initial_metric_filters
                    metric (string) -- model evaluation metric, such as 'precision@'
                    parameter (string) -- model evaluation metric parameter,
                        such as '300_abs'
                    max_below_best (float) The maximum value that the given metric
                        can be below the best for a given train end time
                    threshold_value (float) The threshold value that the given metric can be
            plot (boolean, default True): Whether or not to also plot model group performance
                and thresholding details at this time.
        """
        logger.debug("Updating metric filters with new config %s", new_filters)
        self.model_group_thresholder.update_filters(new_filters)
        if plot:
            logger.debug("After config update, plotting model groups")
            self.plot_model_groups()

    def plot_selection_rules(self):
        """Plot data about the configured selection rules. The three plots outlined below
        are plotted for each metric.

        We base a lot of this on the concept of the 'regret'.
        The regret refers to the difference in performance between a model group
        and the best model group for the next testing window if a selection rule is followed.

        1. A distance-next-time plot, showing the fraction of models worse then a succession of
            regret thresholds for each selection rule
        2. A regret-over-time plot for each selection rule
        3. A metric-over-time plot for each selection rule
        """
        for metric_definition in self.metrics:
            common_kwargs = dict(
                bound_selection_rules=self.selection_rules,
                regret_metric=metric_definition["metric"],
                regret_parameter=metric_definition["parameter"],
                model_group_ids=self.thresholded_model_group_ids,
                train_end_times=self.train_end_times[:-1],
                # We can't calculate regrets for the most recent train end time,
                # so don't send that in. Assumes that the train_end_times
                # are sorted in the constructor
            )
            self.selection_rule_plotter.plot_all_selection_rules(**common_kwargs)

            df = self.selection_rule_performance_plotter.generate_plot_data(
                **common_kwargs
            )
            self.selection_rule_performance_plotter.regret_plot_from_dataframe(
                metric=metric_definition["metric"],
                parameter=metric_definition["parameter"],
                df=df,
            )
            self.selection_rule_performance_plotter.raw_next_time_plot_from_dataframe(
                metric=metric_definition["metric"],
                parameter=metric_definition["parameter"],
                df=df,
            )

            key = metric_definition["metric"] + metric_definition["parameter"]
            self.results_for_rule[key] = df

    def register_selection_rule_grid(self, rule_grid, plot=True):
        """Register a grid of selection rules

        Args:
            rule_grid (list): Groups of selection rules that share parameters. See documentation below for schema.
            plot: (boolean, defaults to True) Whether or not to plot the selection
                rules at this time.

        `rules_grid` is a list of dicts. Each dict, which defines a group, has two required keys:
        `shared_parameters` and `selection_rules`.

        `shared_parameters`: A list of dicts, each with a set of parameters that are taken
        by all selection rules in this group.

        For each of these shared parameter sets, the grid will create selection rules
        combining the set with all possible selection rule/parameter combinations.

        This can be used to quickly combine, say, a variety of rules that
        all are concerned with precision at top 100 entities.

        `selection_rules`: A list of dicts, each with:

        - A 'name' attribute that matches a selection rule in audition.selection_rules
        - Parameters and values taken by that selection rule. Values in list form are
        all added to the grid. If the selection rule has no parameters, or the parameters are all covered
        by the shared parameters in this group, none are needed here.

        Each selection rule in the group must have all of its required parameters
        covered by the shared parameters in its group and the parameters given to it.

        Refer to [Selection Rules](../selection_rules/#selection-rules) for available selection rules
        and their parameters.
        The exceptions are the first two arguments to each selection rule,
        'df' and 'train_end_time'.
        These are contextual and thus provided internally by Audition.

        Example:
        ```
        [{
            'shared_parameters': [
                    {'metric': 'precision@', 'parameter': '100_abs'},
                    {'metric': 'recall@', 'parameter': '100_abs'},
                ],
                'selection_rules': [
                    {'name': 'most_frequent_best_dist',
                        'dist_from_best_case': [0.1, 0.2, 0.3]},
                    {'name': 'best_current_value'}
                ]
        }]
        ```
        """
        self.selection_rules = make_selection_rule_grid(rule_grid)
        if plot:
            self.plot_selection_rules()

Attributes#

average_regret_for_rules property #

Returns the average regret for each selection rule, over the specified list of train/test periods.

Returns:

Type Description
dict

A dict with a key-value pair for each selection rule and the average regret for that rule. Structure:

{'descriptive rule_name': .5}

baseline_model_groups = model_groups_filter(train_end_times=train_end_times, initial_model_group_ids=baseline_model_group_ids, models_table=models_table, db_engine=db_engine) instance-attribute #
best_distance_plotter = BestDistancePlotter(self.distance_from_best_table, self.directory) instance-attribute #
directory = directory instance-attribute #
distance_from_best_table = DistanceFromBestTable(db_engine=db_engine, models_table=models_table, distance_table=distance_table, agg_type=agg_type) instance-attribute #
first_pass_model_groups = model_groups_filter(train_end_times=train_end_times, initial_model_group_ids=model_group_ids, models_table=models_table, db_engine=db_engine) instance-attribute #
metric_filters = initial_metric_filters instance-attribute #
metrics property #
model_group_performance_plotter = ModelGroupPerformancePlotter(self.distance_from_best_table, self.directory) instance-attribute #
model_group_thresholder = ModelGroupThresholder(distance_from_best_table=self.distance_from_best_table, train_end_times=train_end_times, initial_model_group_ids=self.first_pass_model_groups, initial_metric_filters=initial_metric_filters) instance-attribute #
results_for_rule = {} instance-attribute #
selection_rule_model_group_ids property #

Calculate the current winners for each selection rule and the most recent date

Returns:

Type Description
dict

A dict with a key-value pair for each selection rule and the list of n

dict

model_group_ids that it selected. Structure:

{'descriptive rule_name':[1,2,3]}

selection_rule_performance_plotter = SelectionRulePerformancePlotter(self.selection_rule_picker, directory) instance-attribute #
selection_rule_picker = SelectionRulePicker(self.distance_from_best_table) instance-attribute #
selection_rule_plotter = SelectionRulePlotter(self.selection_rule_picker, self.directory) instance-attribute #
thresholded_model_group_ids property #

The model group thresholder will have a varying list of model group ids depending on its current thresholding rules, this is a reference to whatever that current list is.

Returns:

Type Description
list

list of model group ids allowed by the current metric threshold rules

train_end_times = sorted(train_end_times) instance-attribute #

Functions#

__init__(db_engine, model_group_ids, train_end_times, initial_metric_filters, models_table=None, distance_table=None, directory=None, agg_type='worst', baseline_model_group_ids=None) #

Filter model groups using a two-step process:

  1. Broad thresholds to filter out truly bad models
  2. A selection rule grid to find the best model groups over time for each of a variety of methods

This is achieved by creating a 'best distance' table, which functions like a denormalized 'model group/model/evaluations', storing for each model group/train end time/metric/parameter: 1. the raw evaluation value, 2. the distance of that evaluation metric from the best model group at that train time, 3. and the distance of the metric from the best model group the next train time

Each of the steps is computed based on the data in this table, and an iterative process of sending thresholding/selection configuration and viewing the results.

For step 1, the initial configuration is sent in this constructor (as 'initial_metric_filters', format detailed below), future iterations of this configuration are sent to 'update_metric_filters'.

For step 2, all configuration is sent to the object via 'register_selection_rule_grid', and its format is detailed in that method's docstring

Parameters:

Name Type Description Default
db_engine engine

A database engine with access to a results schema of a completed modeling run

required
model_group_ids list

A large list of model groups to audition. No effort should be needed to pick 'good' model groups, but they should all be groups that could be used if they are found to perform well. They should also each have evaluations for any train end times you wish to include in analysis

required
train_end_times list

A list of train end times that all of the given model groups contain evaluations for and that you want to be deemed important in the analysis

required
initial_metric_filters list

A list of metrics to filter model groups on, and how to filter them. Each entry should be a dict of the format:

{
    'metric': 'string',
    'parameter': 'string',
    'max_below_best': .5,
    'threshold_value': .5
 }

metric (string): model evaluation metric, such as 'precision@'
parameter (string): model evaluation metric parameter,
    such as '300_abs'
max_below_best (float): The maximum value that the given metric
    can be below the best for a given train end time
threshold_value (float): The minimum value that the given metric can be
required
models_table string

The name of the results schema models table that you want to use. Will default to 'models', which is also the default in triage.

None
distance_table string

The name of the 'best distance' table to use. Will default to 'best_distance', but this can be sent if you want to avoid clobbering the results from a prior analysis.

None
baseline_model_group_ids list

An optional list of model groups for baseline models which will be included on all plots without being subject to filtering or included as candidate models from the selection process.

None
Source code in src/triage/component/audition/__init__.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def __init__(
    self,
    db_engine,
    model_group_ids,
    train_end_times,
    initial_metric_filters,
    models_table=None,
    distance_table=None,
    directory=None,
    agg_type='worst',
    baseline_model_group_ids=None,
):
    """Filter model groups using a two-step process:

    1. Broad thresholds to filter out truly bad models
    2. A selection rule grid to find the best model groups over time
        for each of a variety of methods

    This is achieved by creating a 'best distance' table, which functions like a
    denormalized 'model group/model/evaluations', storing for each
    model group/train end time/metric/parameter:
        1. the raw evaluation value,
        2. the distance of that evaluation metric from the best model group at that train time,
        3. and the distance of the metric from the best model group the *next* train time

    Each of the steps is computed based on the data in this table, and an iterative process of
        sending thresholding/selection configuration and viewing the results.

    For step 1, the initial configuration is sent in this constructor
        (as 'initial_metric_filters', format detailed below), future iterations of this
        configuration are sent to 'update_metric_filters'.

    For step 2, all configuration is sent to the object via 'register_selection_rule_grid',
        and its format is detailed in that method's docstring

    Args:
        db_engine (sqlalchemy.engine): A database engine with access to a
            results schema of a completed modeling run
        model_group_ids (list): A large list of model groups to audition. No effort should
            be needed to pick 'good' model groups, but they should all be groups that could
            be used if they are found to perform well. They should also each have evaluations
            for any train end times you wish to include in analysis
        train_end_times (list): A list of train end times that all of the given model groups
            contain evaluations for and that you want to be deemed important in the analysis
        initial_metric_filters (list): A list of metrics to filter model
            groups on, and how to filter them. Each entry should be a dict
            of the format:

                {
                    'metric': 'string',
                    'parameter': 'string',
                    'max_below_best': .5,
                    'threshold_value': .5
                 }

                metric (string): model evaluation metric, such as 'precision@'
                parameter (string): model evaluation metric parameter,
                    such as '300_abs'
                max_below_best (float): The maximum value that the given metric
                    can be below the best for a given train end time
                threshold_value (float): The minimum value that the given metric can be
        models_table (string, optional): The name of the results schema
            models table that you want to use. Will default to 'models',
            which is also the default in triage.
        distance_table (string, optional): The name of the 'best distance' table to use.
            Will default to 'best_distance', but this can be sent if you want to avoid
            clobbering the results from a prior analysis.
        agg_type (string) Method for aggregating metric values (for instance, if there
            are multiple models at a given train_end_time with different random seeds).
            Can be: 'mean', 'best', or 'worst' (the default)
        baseline_model_group_ids (list): An optional list of model groups for baseline 
            models which will be included on all plots without being subject to filtering 
            or included as candidate models from the selection process.
    """
    self.metric_filters = initial_metric_filters
    # sort the train end times so we can reliably pick off the last time later
    self.train_end_times = sorted(train_end_times)
    self.directory = directory
    models_table = models_table or "models"
    distance_table = distance_table or "best_distance"
    self.distance_from_best_table = DistanceFromBestTable(
        db_engine=db_engine,
        models_table=models_table,
        distance_table=distance_table,
        agg_type=agg_type
    )
    self.best_distance_plotter = BestDistancePlotter(
        self.distance_from_best_table, self.directory
    )

    if baseline_model_group_ids:
        self.baseline_model_groups = model_groups_filter(
            train_end_times=train_end_times,
            initial_model_group_ids=baseline_model_group_ids,
            models_table=models_table,
            db_engine=db_engine,
        )
    else:
        self.baseline_model_groups = set([])

    self.first_pass_model_groups = model_groups_filter(
        train_end_times=train_end_times,
        initial_model_group_ids=model_group_ids,
        models_table=models_table,
        db_engine=db_engine,
    )

    self.model_group_thresholder = ModelGroupThresholder(
        distance_from_best_table=self.distance_from_best_table,
        train_end_times=train_end_times,
        initial_model_group_ids=self.first_pass_model_groups,
        initial_metric_filters=initial_metric_filters,
    )
    self.model_group_performance_plotter = ModelGroupPerformancePlotter(
        self.distance_from_best_table, self.directory
    )

    self.selection_rule_picker = SelectionRulePicker(self.distance_from_best_table)
    self.selection_rule_plotter = SelectionRulePlotter(
        self.selection_rule_picker, self.directory
    )
    self.selection_rule_performance_plotter = SelectionRulePerformancePlotter(
        self.selection_rule_picker, directory
    )

    # note we populate the distance from best table using both the
    # baseline and candidate model groups
    self.distance_from_best_table.create_and_populate(
        self.first_pass_model_groups | self.baseline_model_groups, 
        self.train_end_times, 
        self.metrics
    )
    self.results_for_rule = {}
plot_model_groups() #

Display model group plots, one of the below for each configured metric.

  1. A cumulative plot showing the effect of different worse-than-best thresholds for the given metric across the thresholded model groups.

  2. A performance-over-time plot showing the value for the given metric over time for each thresholded model group

Source code in src/triage/component/audition/__init__.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def plot_model_groups(self):
    """Display model group plots, one of the below for each configured metric.

    1. A cumulative plot showing the effect of different worse-than-best
    thresholds for the given metric across the thresholded model groups.

    2. A performance-over-time plot showing the value for the given
    metric over time for each thresholded model group
    """
    logger.debug("Showing best distance plots for all metrics")
    thresholded_model_group_ids = self.thresholded_model_group_ids
    if len(thresholded_model_group_ids) == 0:
        logger.warning(
            "Zero model group ids found that passed configured thresholds. "
            "Nothing to plot"
        )
        return
    self.best_distance_plotter.plot_all_best_dist(
        self.metrics, 
        thresholded_model_group_ids | self.baseline_model_groups, 
        self.train_end_times
    )
    logger.debug("Showing model group performance plots for all metrics")
    self.model_group_performance_plotter.plot_all(
        metric_filters=self.metric_filters,
        model_group_ids=thresholded_model_group_ids | self.baseline_model_groups,
        train_end_times=self.train_end_times,
    )
plot_selection_rules() #

Plot data about the configured selection rules. The three plots outlined below are plotted for each metric.

We base a lot of this on the concept of the 'regret'. The regret refers to the difference in performance between a model group and the best model group for the next testing window if a selection rule is followed.

  1. A distance-next-time plot, showing the fraction of models worse then a succession of regret thresholds for each selection rule
  2. A regret-over-time plot for each selection rule
  3. A metric-over-time plot for each selection rule
Source code in src/triage/component/audition/__init__.py
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
def plot_selection_rules(self):
    """Plot data about the configured selection rules. The three plots outlined below
    are plotted for each metric.

    We base a lot of this on the concept of the 'regret'.
    The regret refers to the difference in performance between a model group
    and the best model group for the next testing window if a selection rule is followed.

    1. A distance-next-time plot, showing the fraction of models worse then a succession of
        regret thresholds for each selection rule
    2. A regret-over-time plot for each selection rule
    3. A metric-over-time plot for each selection rule
    """
    for metric_definition in self.metrics:
        common_kwargs = dict(
            bound_selection_rules=self.selection_rules,
            regret_metric=metric_definition["metric"],
            regret_parameter=metric_definition["parameter"],
            model_group_ids=self.thresholded_model_group_ids,
            train_end_times=self.train_end_times[:-1],
            # We can't calculate regrets for the most recent train end time,
            # so don't send that in. Assumes that the train_end_times
            # are sorted in the constructor
        )
        self.selection_rule_plotter.plot_all_selection_rules(**common_kwargs)

        df = self.selection_rule_performance_plotter.generate_plot_data(
            **common_kwargs
        )
        self.selection_rule_performance_plotter.regret_plot_from_dataframe(
            metric=metric_definition["metric"],
            parameter=metric_definition["parameter"],
            df=df,
        )
        self.selection_rule_performance_plotter.raw_next_time_plot_from_dataframe(
            metric=metric_definition["metric"],
            parameter=metric_definition["parameter"],
            df=df,
        )

        key = metric_definition["metric"] + metric_definition["parameter"]
        self.results_for_rule[key] = df
register_selection_rule_grid(rule_grid, plot=True) #

Register a grid of selection rules

Parameters:

Name Type Description Default
rule_grid list

Groups of selection rules that share parameters. See documentation below for schema.

required
plot

(boolean, defaults to True) Whether or not to plot the selection rules at this time.

True

rules_grid is a list of dicts. Each dict, which defines a group, has two required keys: shared_parameters and selection_rules.

shared_parameters: A list of dicts, each with a set of parameters that are taken by all selection rules in this group.

For each of these shared parameter sets, the grid will create selection rules combining the set with all possible selection rule/parameter combinations.

This can be used to quickly combine, say, a variety of rules that all are concerned with precision at top 100 entities.

selection_rules: A list of dicts, each with:

  • A 'name' attribute that matches a selection rule in audition.selection_rules
  • Parameters and values taken by that selection rule. Values in list form are all added to the grid. If the selection rule has no parameters, or the parameters are all covered by the shared parameters in this group, none are needed here.

Each selection rule in the group must have all of its required parameters covered by the shared parameters in its group and the parameters given to it.

Refer to Selection Rules for available selection rules and their parameters. The exceptions are the first two arguments to each selection rule, 'df' and 'train_end_time'. These are contextual and thus provided internally by Audition.

Example:

[{
    'shared_parameters': [
            {'metric': 'precision@', 'parameter': '100_abs'},
            {'metric': 'recall@', 'parameter': '100_abs'},
        ],
        'selection_rules': [
            {'name': 'most_frequent_best_dist',
                'dist_from_best_case': [0.1, 0.2, 0.3]},
            {'name': 'best_current_value'}
        ]
}]

Source code in src/triage/component/audition/__init__.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def register_selection_rule_grid(self, rule_grid, plot=True):
    """Register a grid of selection rules

    Args:
        rule_grid (list): Groups of selection rules that share parameters. See documentation below for schema.
        plot: (boolean, defaults to True) Whether or not to plot the selection
            rules at this time.

    `rules_grid` is a list of dicts. Each dict, which defines a group, has two required keys:
    `shared_parameters` and `selection_rules`.

    `shared_parameters`: A list of dicts, each with a set of parameters that are taken
    by all selection rules in this group.

    For each of these shared parameter sets, the grid will create selection rules
    combining the set with all possible selection rule/parameter combinations.

    This can be used to quickly combine, say, a variety of rules that
    all are concerned with precision at top 100 entities.

    `selection_rules`: A list of dicts, each with:

    - A 'name' attribute that matches a selection rule in audition.selection_rules
    - Parameters and values taken by that selection rule. Values in list form are
    all added to the grid. If the selection rule has no parameters, or the parameters are all covered
    by the shared parameters in this group, none are needed here.

    Each selection rule in the group must have all of its required parameters
    covered by the shared parameters in its group and the parameters given to it.

    Refer to [Selection Rules](../selection_rules/#selection-rules) for available selection rules
    and their parameters.
    The exceptions are the first two arguments to each selection rule,
    'df' and 'train_end_time'.
    These are contextual and thus provided internally by Audition.

    Example:
    ```
    [{
        'shared_parameters': [
                {'metric': 'precision@', 'parameter': '100_abs'},
                {'metric': 'recall@', 'parameter': '100_abs'},
            ],
            'selection_rules': [
                {'name': 'most_frequent_best_dist',
                    'dist_from_best_case': [0.1, 0.2, 0.3]},
                {'name': 'best_current_value'}
            ]
    }]
    ```
    """
    self.selection_rules = make_selection_rule_grid(rule_grid)
    if plot:
        self.plot_selection_rules()
save_result_model_group_ids(fname='results_model_group_ids.json') #
Source code in src/triage/component/audition/__init__.py
221
222
223
def save_result_model_group_ids(self, fname="results_model_group_ids.json"):
    with open(os.path.join(self.directory, fname), "w") as f:
        f.write(json.dumps(self.selection_rule_model_group_ids))
set_one_metric_filter(metric='precision@', parameter='100_abs', max_from_best=0.05, threshold_value=0.1) #

Set one thresholding metric filter If one wnats to update multiple filters, one should use update_metric_filters() instead.

Parameters:

Name Type Description Default
metric string

model evaluation metric such as 'precision@'

'precision@'
parameter string

model evaluation parameter such as '100_abs'

'100_abs'
max_from_best string

The maximum value that the given metric can be below the best for a given train end time

0.05
threshold_value string

The thresold value that the given metric can be

0.1
plot boolean, default True

Whether or not to also plot model group performance and thresholding details at this time.

required
Source code in src/triage/component/audition/__init__.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def set_one_metric_filter(
    self,
    metric="precision@",
    parameter="100_abs",
    max_from_best=0.05,
    threshold_value=0.1,
):
    """Set one thresholding metric filter
    If one wnats to update multiple filters, one should use `update_metric_filters()` instead.

    Args:
        metric (string): model evaluation metric such as 'precision@'
        parameter (string): model evaluation parameter such as '100_abs'
        max_from_best (string): The maximum value that the given metric can be below the best
            for a given train end time
        threshold_value (string): The thresold value that the given metric can be
        plot (boolean, default True): Whether or not to also plot model group performance
            and thresholding details at this time.
    """
    new_filters = [
        {
            "metric": metric,
            "parameter": parameter,
            "max_from_best": max_from_best,
            "threshold_value": threshold_value,
        }
    ]
    self.update_metric_filters(new_filters)
update_metric_filters(new_filters=None, plot=True) #

Update the thresholding metric filters

    Args:
        new_filters (list): A list of metrics to filter model
            groups on, and how to filter them. This is an identical format to
            the list given to 'initial_metric_filters' in the constructor.
            Each entry should be a dict with the keys:

initial_metric_filters metric (string) -- model evaluation metric, such as 'precision@' parameter (string) -- model evaluation metric parameter, such as '300_abs' max_below_best (float) The maximum value that the given metric can be below the best for a given train end time threshold_value (float) The threshold value that the given metric can be plot (boolean, default True): Whether or not to also plot model group performance and thresholding details at this time.

Source code in src/triage/component/audition/__init__.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
    def update_metric_filters(self, new_filters=None, plot=True):
        """Update the thresholding metric filters

        Args:
            new_filters (list): A list of metrics to filter model
                groups on, and how to filter them. This is an identical format to
                the list given to 'initial_metric_filters' in the constructor.
                Each entry should be a dict with the keys:
initial_metric_filters
                    metric (string) -- model evaluation metric, such as 'precision@'
                    parameter (string) -- model evaluation metric parameter,
                        such as '300_abs'
                    max_below_best (float) The maximum value that the given metric
                        can be below the best for a given train end time
                    threshold_value (float) The threshold value that the given metric can be
            plot (boolean, default True): Whether or not to also plot model group performance
                and thresholding details at this time.
        """
        logger.debug("Updating metric filters with new config %s", new_filters)
        self.model_group_thresholder.update_filters(new_filters)
        if plot:
            logger.debug("After config update, plotting model groups")
            self.plot_model_groups()

Functions#