Пример #1
0
kwargs.update(json.loads(params))
kwargs[
    'model_id'] = 'DSS.H2O_connector.model.' + output_folder.full_name + '.' + algorithm

algorithms = {
    'autoencoder': h2o.h2o.autoencoder,
    'deeplearning': h2o.h2o.deeplearning,
    'gbm': h2o.h2o.gbm,
    'glm': h2o.h2o.glm,
    'glrm': h2o.h2o.glrm,
    'kmeans': h2o.h2o.kmeans,
    'naive_bayes': h2o.h2o.naive_bayes,
    'prcomp': h2o.h2o.prcomp,
    'random_forest': h2o.h2o.random_forest,
    'svd': h2o.h2o.svd,
}

# print 'Arguments passed to H2O: ', kwargs # This makes the job fail with exception None ??
model = algorithms[algorithm](**kwargs)

## save model summary in output_folder and model to disk
with open(os.path.join(output_folder_path, 'model_summary.txt'), 'w') as file:
    orig_stdout = sys.stdout
    sys.stdout = file
    model.show()  # this method uses print to write to stdout
    sys.stdout = orig_stdout

h2o.h2o.save_model(model,
                   saved_model_folder(model_config, output_folder),
                   force=True)  # "force" means overwrite
Пример #2
0
## load model
def find_model_id(folder):
    with open(os.path.join(folder.get_path(), 'model_summary.txt')) as file:
        for line in file:
            match = re.match('Model Key:  (DSS\.H2O_connector\.model\.'+folder.full_name+'\..*)$', line)
            if match:
                return match.group(1)
    raise Exception('Could not find model id in model_summary.txt')

model_id = find_model_id(input_folder)

try:
    model = h2o.h2o.get_model(model_id)
except EnvironmentError as e:
    print 'Model key unknown to H2O:\n“',e,'”\n','Will thus load from saved model.'
    path = os.path.join(saved_model_folder(model_config, input_folder), model_id)
    model = h2o.h2o.load_model(path=path)

## compute predictions and add input columns to output
predict_frame = model.predict(input_frame)
print "predict_frame id:", predict_frame.frame_id
print "predict_frame types:", predict_frame.types
sys.stdout.flush()

for col in get_recipe_config()["columns_to_copy_to_output"]:
    predict_frame[col] = input_frame[col]

## save output
def DSS_type(col_name):
    return {
        'enum':'string',
Пример #3
0
        for line in file:
            match = re.match(
                'Model Key:  (DSS\.H2O_connector\.model\.' + folder.full_name +
                '\..*)$', line)
            if match:
                return match.group(1)
    raise Exception('Could not find model id in model_summary.txt')


model_id = find_model_id(input_folder)

try:
    model = h2o.h2o.get_model(model_id)
except EnvironmentError as e:
    print 'Model key unknown to H2O:\n“', e, '”\n', 'Will thus load from saved model.'
    path = os.path.join(saved_model_folder(model_config, input_folder),
                        model_id)
    model = h2o.h2o.load_model(path=path)

## compute predictions and add input columns to output
predict_frame = model.predict(input_frame)
print "predict_frame id:", predict_frame.frame_id
print "predict_frame types:", predict_frame.types
sys.stdout.flush()

for col in get_recipe_config()["columns_to_copy_to_output"]:
    predict_frame[col] = input_frame[col]


## save output
def DSS_type(col_name):
Пример #4
0
kwargs['model_id'] = 'DSS.H2O_connector.model.' + output_folder.full_name + '.' + algorithm


algorithms = {
    'autoencoder': h2o.h2o.autoencoder,
    'deeplearning': h2o.h2o.deeplearning,
    'gbm': h2o.h2o.gbm,
    'glm': h2o.h2o.glm,
    'glrm': h2o.h2o.glrm,
    'kmeans': h2o.h2o.kmeans,
    'naive_bayes': h2o.h2o.naive_bayes,
    'prcomp': h2o.h2o.prcomp,
    'random_forest': h2o.h2o.random_forest,
    'svd': h2o.h2o.svd,
}

# print 'Arguments passed to H2O: ', kwargs # This makes the job fail with exception None ??
model = algorithms[algorithm](**kwargs)

## save model summary in output_folder and model to disk
with open(os.path.join(output_folder_path, 'model_summary.txt'),'w') as file:
    orig_stdout = sys.stdout
    sys.stdout = file
    model.show() # this method uses print to write to stdout
    sys.stdout = orig_stdout

h2o.h2o.save_model(
    model,
    saved_model_folder(model_config, output_folder),
    force=True) # "force" means overwrite