def rf_predict(input_dict): from discomll.ensemble import distributed_random_forest predictions_url = distributed_random_forest.predict(input_dict["dataset"], fitmodel_url=input_dict["fitmodel_url"], save_results=True) return {"string": predictions_url}
def rf_predict(input_dict): from discomll.ensemble import distributed_random_forest predictions_url = distributed_random_forest.predict( input_dict["dataset"], fitmodel_url=input_dict["fitmodel_url"], save_results=True) return {"string": predictions_url}
from discomll import dataset from discomll.ensemble import distributed_random_forest train = dataset.Data(data_tag=["http://ropot.ijs.si/data/lymphography/train/xaaaaa.gz", "http://ropot.ijs.si/data/lymphography/train/xaaabj.gz"], X_indices=range(2, 20), data_type="gzip", generate_urls=True, id_index=0, y_index=1, X_meta=["d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c"], delimiter=",") test = dataset.Data(data_tag=["http://ropot.ijs.si/data/lymphography/test/xaaaaa.gz", "http://ropot.ijs.si/data/lymphography/test/xaaabj.gz"], data_type="gzip", generate_urls=True, X_indices=range(2, 20), id_index=0, y_index=1, X_meta=["d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c"], delimiter=",") fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10, min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1, separate_max=True, random_state=None, save_results=True) predict_url = distributed_random_forest.predict(test, fit_model) print predict_url
from disco.core import result_iterator from discomll import dataset from discomll.ensemble import distributed_random_forest from discomll.utils import model_view from discomll.utils import accuracy train = dataset.Data(data_tag=[ ["http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"]], id_index=0, X_indices=xrange(1, 10), X_meta="http://ropot.ijs.si/data/datasets_meta/breastcancer_meta.csv", y_index=10, delimiter=",") fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10, min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1, separate_max=True, random_state=None, save_results=True) print model_view.output_model(fit_model) # predict training dataset predictions = distributed_random_forest.predict(train, fit_model) # output results for k, v in result_iterator(predictions): print k, v # measure accuracy ca = accuracy.measure(train, predictions) print ca
delimiter=",") test = dataset.Data(data_tag=[ "http://ropot.ijs.si/data/lymphography/test/xaaaaa.gz", "http://ropot.ijs.si/data/lymphography/test/xaaabj.gz" ], data_type="gzip", generate_urls=True, X_indices=range(2, 20), id_index=0, y_index=1, X_meta=[ "d", "d", "d", "d", "d", "d", "d", "d", "c", "c", "d", "d", "d", "d", "d", "d", "d", "c" ], delimiter=",") fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10, min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1, separate_max=True, random_state=None, save_results=True) predict_url = distributed_random_forest.predict(test, fit_model) print predict_url
]], id_index=0, X_indices=xrange(1, 10), X_meta="http://ropot.ijs.si/data/datasets_meta/breastcancer_meta.csv", y_index=10, delimiter=",") fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10, min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1, separate_max=True, random_state=None, save_results=True) print model_view.output_model(fit_model) # predict training dataset predictions = distributed_random_forest.predict(train, fit_model) # output results for k, v in result_iterator(predictions): print k, v # measure accuracy ca = accuracy.measure(train, predictions) print ca
from discomll import dataset from discomll.ensemble import distributed_random_forest train = dataset.Data(data_tag=["http://ropot.ijs.si/data/segmentation/train/xaaaaa.gz", "http://ropot.ijs.si/data/segmentation/train/xaaabj.gz"], data_type="gzip", generate_urls=True, X_indices=range(2, 21), id_index=0, y_index=1, X_meta=["c" for i in range(2, 21)], delimiter=",") test = dataset.Data(data_tag=["http://ropot.ijs.si/data/segmentation/test/xaaaaa.gz", "http://ropot.ijs.si/data/segmentation/test/xaaabj.gz"], data_type="gzip", generate_urls=True, X_indices=range(2, 21), id_index=0, y_index=1, X_meta=["c" for i in range(2, 21)], delimiter=",") fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10, min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1, separate_max=True, random_state=None, save_results=True) predictions = distributed_random_forest.predict(test, fit_model, diff=1) print predictions