Пример #1
0
from automlk.dataset import get_dataset_list, create_dataset_sets
from automlk.store import list_key_store, set_key_store

"""
module specifically designed to update pp sampling results
"""

i = 0
for dt in get_dataset_list():
    if dt.status != 'created' and dt.problem_type == 'classification':
        print(dt.name)
        results = list_key_store('dataset:%s:rounds' % dt.dataset_id)
        set_key_store('dataset:%s:rounds_backup' % dt.dataset_id, results)
        flag = False
        for r in results:
            pipeline = r['pipeline']
            # print(pipeline)
            sampling = [p for p in pipeline if p[1] == 'sampling']
            if len(sampling) == 0:
                print('missing sampling step')
                r['pipeline'].append(('SP_PASS', 'sampling', 'No re-sampling', {}))
                i += 1
                flag = True

        if flag:
            set_key_store('dataset:%s:rounds' % dt.dataset_id, results)

print('modified pipelines:', i)


Пример #2
0
            all_cat.append((c, pd.merge(best, counts, on='cat_ref').to_dict(orient='records')))

    return all_cat

"""


def __select_cat(c, pipeline):
    # select the element in the pipeline with category c
    for p in pipeline:
        if p[1] == c:
            return p
    return '', '', '', ''


for dt in get_dataset_list(include_results=True):
    # check graph folders
    if not os.path.exists(get_dataset_folder(dt.dataset_id) + '/graphs_dark'):
        os.makedirs(get_dataset_folder(dt.dataset_id) + '/graphs_dark')
    # generate best rounds
    if dt.status != 'created':
        print(dt.name)
        # get search history
        df = get_search_rounds(dt.dataset_id)

        # generate graphs
        best = __get_best_models(df)
        best1 = best[best.level == 1]
        best2 = best[best.level == 2]
        graph_history_search(dt, df, best1, 1)
        graph_history_search(dt, df, best2, 2)
Пример #3
0
 def set_choices(self, problem_type):
     self.dataset.choices = [(d.dataset_id, '#%s: %s' % (d.dataset_id, d.name)) for d in get_dataset_list() if d.problem_type == problem_type]
Пример #4
0
from automlk.dataset import get_data_folder, get_dataset_list

print('data folder:', get_data_folder())
print('list of datasets:', [d.name for d in get_dataset_list()])
Пример #5
0
from automlk.dataset import get_data_folder, get_dataset_list

print('data folder:', get_data_folder())
print('list of datasets:', get_dataset_list())
Пример #6
0
from automlk.store import *
from automlk.dataset import get_dataset_ids, get_dataset, get_dataset_list

print('dataset:counter', get_key_store('dataset:counter'))
print('dataset:list', list_key_store('dataset:list'))

print('dataset ids', get_dataset_ids())

dt = get_dataset(1)
print('dt ok')

l = get_dataset_list()