- order: [3, 4] #overall avg recall at 10% - order: [5, 6] #overall_groups at 10% - order: [7, 8] #overall_groups at 30% - order: [9, 10] #director3 at 30% - order: [11, 12] #director2 at 30% - order: [13, 14] #director1 at 30% """ import yaml res = select_model.run(args['experiment_id'], yaml.load(selector), data.query('fold != "2017-12-26"')) # + import seaborn as sns selector = res['selectors'][0] fig, axis = plt.subplots(figsize=(15, 10), ncols=1, nrows=1) df = explode(selector['data']) model_name = df['name'].unique()[0] df = pd.concat([df, data.query('name in ("higher_than_x", "random")')]) df['new_name'] = df.apply(lambda x: x['name'] + '_' + str(x['learner_id']), 1) translate = { 'higher_than_x': 'Ordered by Value Baseline', 'random': 'Random Baseline',
from model_selection import plotting, select_model from utils.utils import connect_to_database con = connect_to_database() # Add here your parameters: experiment_id = 1129688681 model_selector = 'good_selector' args = dict(experiment_id=experiment_id) plotting.overall_performance_per_fold(args, thresh=0) res = select_model.run( args['experiment_id'], Path().cwd().parent / 'model_selectors' / (model_selector + '.yaml')) for df in res['selectors']: print(df['learner_stats']) good_learners = res['selectors'][3]['learner_ids'] # ## Feature Importance query = """ select feature, avg(abs(importance)) importance from experiments.feature_importances where learner_id = {learner_id} and importance != 0 group by feature
import sys from pathlib import Path import os source_path = str( Path(os.path.abspath('joaoc-model-selector.py')).parent.parent / 'src') if source_path not in sys.path: sys.path.insert(0, source_path) # sys.path.insert(0, '../utils') import model_selection.select_model as sm # !pwd res = sm.run( 3124936745, '/home/joao.carabetta/Documents/dncp/model_selectors/template.yaml') # + import pickle # Packages import pandas as pd import matplotlib.pyplot as plt from matplotlib import cm import numpy as np import seaborn as sns # from pipeline.data_persistence import persist_local from itertools import cycle color = cycle(cm.get_cmap('tab20', 20).colors)
type: top value: 10 statistic: std higher_is_better: false selectors: - order: [1] - order: [2] - order: [3, 4] """) metric_selected, data, max_fold = select_model.run(macro_experiment_id, selector) # ## Recall by Quality Reviews # # <div class="alert alert-block alert-info"> # The <b>Selected Models</b> line has an interval. The intervall boundaries are the minimum and maximum of all selected models. # </div> plotting.plot_metrics(metric_selected, data, max_fold, baselines, metric='recall') # ## Precision by Quality Reviews
type: threshold value: 0.01 statistic: mean higher_is_better: true selectors: - order: [1] - order: [2] - order: [3] """ res = select_model.run(args['experiment_id'], yaml.load(selector), data) # + nrows = len(res['selectors']) fig, ax = plt.subplots(figsize=(15, nrows * 10), ncols=1, nrows=nrows) for i, selector in enumerate(res['selectors']): axis = ax[i] df = explode(selector['data']) df = pd.concat([df, data.query('name in ("higher_than_x", "random")')]) df['new_name'] = df.apply(lambda x: x['name'] + '_' + str(x['learner_id']), 1)