示例#1
0
def get_train_set(dataset_id):
    # download the train set used in training
    dt = get_dataset(dataset_id)
    filename = __path_filename(dt.filename_train)
    ext = filename.split('.')[-1]
    return send_file(filename,
                     as_attachment=True,
                     attachment_filename='train_%s.%s' % (dataset_id, ext))
示例#2
0
def column(dataset_id, col):
    # gets the features
    dataset = get_dataset(dataset_id)
    if col == "None":
        return jsonify([
            f for f in dataset.features
            if f.to_keep and f.name != dataset.y_col
        ][0].__dict__)
    for f in dataset.features:
        if f.name == col:
            return jsonify(f.__dict__)
    return jsonify()
示例#3
0
from automlk.dataset import get_dataset
from automlk.graphs import graph_correl_features

for i in range(1, 7):
    dt = get_dataset(i)
    graph_correl_features(dt)
示例#4
0
import pickle
import eli5
import pandas as pd
from eli5.sklearn import PermutationImportance
from automlk.context import get_dataset_folder
from automlk.dataset import get_dataset, get_dataset_sample

dataset_id = '37'
round_id = '19'

dataset = get_dataset(dataset_id)
ds = pickle.load(
    open(get_dataset_folder(dataset_id) + '/data/eval_set.pkl', 'rb'))

folder = get_dataset_folder(dataset_id) + '/models/'

names = list(
    pickle.load(open(folder + '%s_feature_names.pkl' % round_id, 'rb')))
print(names)

model = pickle.load(open(folder + '%s_model.pkl' % round_id, 'rb'))
pipe_model = pickle.load(open(folder + '%s_pipe_model.pkl' % round_id, 'rb'))
pipe_transform = pickle.load(
    open(folder + '%s_pipe_transform.pkl' % round_id, 'rb'))

sample = get_dataset_sample(dataset_id)

X_sample = pipe_transform.transform(pd.DataFrame(sample)[dataset.x_cols])
"""
print('-'*60)
print('test prediction pipeline')
示例#5
0
from automlk.store import *
from automlk.dataset import get_dataset_ids, get_dataset, get_dataset_list

print('dataset:counter', get_key_store('dataset:counter'))
print('dataset:list', list_key_store('dataset:list'))

print('dataset ids', get_dataset_ids())

dt = get_dataset(1)
print('dt ok')

l = get_dataset_list()