示例#1
0
def bigdata_mse(request,input_dict,output_dict,widget):
    from discomll.utils import accuracy
    from disco.core import result_iterator
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT+'/'+folder+"/"+tag[0][6:]+'.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination): #file doesnt exists
        results = accuracy.measure(test_data = input_dict["dataset"],
                                predictions = input_dict["predictions"],
                                measure = "mse")
        string = "Mean squared error\n"
        for k, v in result_iterator(results):
            string += str(v) + "\n"
        input_dict["string"] = string

        f = open(destination,'w')
        f.write(str(v))
        f.close()
        
    else:
        string = "Mean squared error\n"
        f = open(destination,'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()


    return render(request, 'visualizations/display_string.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
示例#2
0
def bigdata_ca(request, input_dict, output_dict, widget):
    from discomll.utils import accuracy
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination):  # file doesnt exists
        measure, acc = accuracy.measure(test_data=input_dict["dataset"],
                                   predictions=input_dict["predictions"],
                                   measure="ca")
        string = "Classification Accuracy \n"
        score = str(measure) + " " + str(acc) + "\n"
        string += score
        input_dict["string"] = string

        f = open(destination, 'w')
        f.write(score)
        f.close()

    else:
        #ca results are cached
        string = "Classification Accuracy \n"
        f = open(destination, 'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()

    return render(request, 'visualizations/display_string.html',
                  {'widget': widget, 'input_dict': input_dict, 'output_dict': output_dict})
示例#3
0
def bigdata_mse(request, input_dict, output_dict, widget):
    from discomll.utils import accuracy
    from disco.core import result_iterator
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination):  #file doesnt exists
        results = accuracy.measure(test_data=input_dict["dataset"],
                                   predictions=input_dict["predictions"],
                                   measure="mse")
        string = "Mean squared error\n"
        for k, v in result_iterator(results):
            string += str(v) + "\n"
        input_dict["string"] = string

        f = open(destination, 'w')
        f.write(str(v))
        f.close()

    else:
        string = "Mean squared error\n"
        f = open(destination, 'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()

    return render(request, 'visualizations/display_string.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
示例#4
0
def bigdata_ca(request, input_dict, output_dict, widget):
    from discomll.utils import accuracy
    import os.path
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    folder = 'discomll_measures'
    tag = input_dict["predictions"]
    destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt'
    ensure_dir(destination)

    if input_dict["dataset"].params["id_index"] == -1:
        input_dict["string"] = "ID index should be defined."
    elif not os.path.isfile(destination):  # file doesnt exists
        measure, acc = accuracy.measure(test_data=input_dict["dataset"],
                                        predictions=input_dict["predictions"],
                                        measure="ca")
        string = "Classification Accuracy \n"
        score = str(measure) + " " + str(acc) + "\n"
        string += score
        input_dict["string"] = string

        f = open(destination, 'w')
        f.write(score)
        f.close()

    else:
        #ca results are cached
        string = "Classification Accuracy \n"
        f = open(destination, 'r')
        input_dict["string"] = string + str(f.readlines()[0])
        f.close()

    return render(request, 'visualizations/display_string.html', {
        'widget': widget,
        'input_dict': input_dict,
        'output_dict': output_dict
    })
from disco.core import result_iterator

from discomll import dataset
from discomll.ensemble import distributed_random_forest
from discomll.utils import model_view
from discomll.utils import accuracy

train = dataset.Data(data_tag=[
    ["http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"]],
    id_index=0,
    X_indices=xrange(1, 10),
    X_meta="http://ropot.ijs.si/data/datasets_meta/breastcancer_meta.csv",
    y_index=10,
    delimiter=",")

fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10,
                                          min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1,
                                          separate_max=True, random_state=None, save_results=True)
print model_view.output_model(fit_model)

# predict training dataset
predictions = distributed_random_forest.predict(train, fit_model)

# output results
for k, v in result_iterator(predictions):
    print k, v

# measure accuracy
ca = accuracy.measure(train, predictions)
print ca
示例#6
0
# define test dataset
test = dataset.Data(
    data_tag=["test:breast_cancer_disc_test"],
    data_type="chunk",
    X_indices=xrange(1, 10),
    id_index=0,
    y_index=10,
    delimiter=",",
    y_map=["2", "4"],  # define mapping parameter. "2" is mapped to 1, "4" is mapped to -1.
    missing_vals=["?"],
)  # define missing value symbol

# fit model on training dataset
fit_model = linear_svm.fit(train)

# output model
model = model_view.output_model(fit_model)
print model


# start MR job to predict given test data
predictions = linear_svm.predict(test, fit_model)

# output results
for k, v in result_iterator(predictions):
    print k, v[0]

# measure accuracy
ca = accuracy.measure(test, predictions)
print ca
示例#7
0
    ]],
    id_index=0,
    X_indices=xrange(1, 10),
    X_meta="http://ropot.ijs.si/data/datasets_meta/breastcancer_meta.csv",
    y_index=10,
    delimiter=",")

fit_model = distributed_weighted_forest_rand.fit(train,
                                                 trees_per_chunk=3,
                                                 max_tree_nodes=50,
                                                 min_samples_leaf=5,
                                                 min_samples_split=10,
                                                 class_majority=1,
                                                 measure="info_gain",
                                                 num_medoids=10,
                                                 accuracy=1,
                                                 separate_max=True,
                                                 random_state=None,
                                                 save_results=True)

# predict training dataset
predictions = distributed_weighted_forest_rand.predict(train, fit_model)

# output results
for k, v in result_iterator(predictions):
    print k, v[0]

# measure accuracy
ca = accuracy.measure(train, predictions)
print ca
示例#8
0
# define test dataset
test = dataset.Data(data_tag=["test:breast_cancer_disc_test"],
                    data_type="chunk",
                    X_indices=xrange(1, 10),
                    X_meta=["d" for i in xrange(1, 10)],
                    id_index=0,
                    y_index=10,
                    delimiter=",",
                    y_map=["2", "4"],  # define mapping parameter. "2" is mapped to 1, "4" is mapped to -1.
                    missing_vals=["?"])  # define missing value symbol

# fit model on training dataset
fit_model = naivebayes.fit(train)

# output model
model = model_view.output_model(fit_model)
print model


# start MR job to predict given test data
predictions = naivebayes.predict(test, fit_model)

# output results
for k, v in result_iterator(predictions):
    print k, v[0]

# measure accuracy
ca = accuracy.measure(test, predictions)
print ca