from automlk.dataset import get_dataset_list, create_dataset_sets from automlk.store import list_key_store, set_key_store """ module specifically designed to update pp sampling results """ i = 0 for dt in get_dataset_list(): if dt.status != 'created' and dt.problem_type == 'classification': print(dt.name) results = list_key_store('dataset:%s:rounds' % dt.dataset_id) set_key_store('dataset:%s:rounds_backup' % dt.dataset_id, results) flag = False for r in results: pipeline = r['pipeline'] # print(pipeline) sampling = [p for p in pipeline if p[1] == 'sampling'] if len(sampling) == 0: print('missing sampling step') r['pipeline'].append(('SP_PASS', 'sampling', 'No re-sampling', {})) i += 1 flag = True if flag: set_key_store('dataset:%s:rounds' % dt.dataset_id, results) print('modified pipelines:', i)
all_cat.append((c, pd.merge(best, counts, on='cat_ref').to_dict(orient='records'))) return all_cat """ def __select_cat(c, pipeline): # select the element in the pipeline with category c for p in pipeline: if p[1] == c: return p return '', '', '', '' for dt in get_dataset_list(include_results=True): # check graph folders if not os.path.exists(get_dataset_folder(dt.dataset_id) + '/graphs_dark'): os.makedirs(get_dataset_folder(dt.dataset_id) + '/graphs_dark') # generate best rounds if dt.status != 'created': print(dt.name) # get search history df = get_search_rounds(dt.dataset_id) # generate graphs best = __get_best_models(df) best1 = best[best.level == 1] best2 = best[best.level == 2] graph_history_search(dt, df, best1, 1) graph_history_search(dt, df, best2, 2)
def set_choices(self, problem_type): self.dataset.choices = [(d.dataset_id, '#%s: %s' % (d.dataset_id, d.name)) for d in get_dataset_list() if d.problem_type == problem_type]
from automlk.dataset import get_data_folder, get_dataset_list print('data folder:', get_data_folder()) print('list of datasets:', [d.name for d in get_dataset_list()])
from automlk.dataset import get_data_folder, get_dataset_list print('data folder:', get_data_folder()) print('list of datasets:', get_dataset_list())
from automlk.store import * from automlk.dataset import get_dataset_ids, get_dataset, get_dataset_list print('dataset:counter', get_key_store('dataset:counter')) print('dataset:list', list_key_store('dataset:list')) print('dataset ids', get_dataset_ids()) dt = get_dataset(1) print('dt ok') l = get_dataset_list()