示例#1
0
def nb_iris():

    print "Importing iris_wheader.csv data...\n"
    iris = h2o.upload_file(tests.locate("smalldata/iris/iris_wheader.csv"))
    iris.describe()

    laplace_range = [0, 1, 0.25]
    for i in laplace_range:
        print "H2O Naive Bayes with Laplace smoothing = {0}".format(i)
        iris_nbayes = h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=i)
        iris_nbayes.show()
示例#2
0
def nb_iris(ip, port):
    

    print "Importing iris_wheader.csv data...\n"
    iris = h2o.upload_file(h2o.locate("smalldata/iris/iris_wheader.csv"))
    iris.describe()

    laplace_range = [0, 1, 0.25]
    for i in laplace_range:
        print "H2O Naive Bayes with Laplace smoothing = {0}".format(i)
        iris_nbayes = h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=i)
        iris_nbayes.show()
def nb_init_err():

    print("Importing iris_wheader.csv data...\n")
    iris = h2o.upload_file(
        pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
    iris.describe

    print("Laplace smoothing parameter is negative")
    try:
        h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=-1)
        assert False, "Expected naive bayes algo to fail on negative laplace training parameter"
    except:
        pass

    print("Minimum standard deviation is zero")
    try:
        h2o.naive_bayes(x=iris[0:4], y=iris[4], min_sdev=0)
        assert False, "Expected naive bayes algo to fail on min_sdev = 0"
    except:
        pass

    print("Response column is not categorical")
    try:
        h2o.naive_bayes(x=iris[0:3], y=iris[3], min_sdev=0)
        assert False, "Expected naive bayes algo to fail on response not categorical"
    except:
        pass
def nb_init_err():
    

    print("Importing iris_wheader.csv data...\n")
    iris = h2o.upload_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
    iris.describe

    print("Laplace smoothing parameter is negative")
    try:
        h2o.naive_bayes(x=iris[0:4], y=iris[4], laplace=-1)
        assert False, "Expected naive bayes algo to fail on negative laplace training parameter"
    except:
        pass

    print("Minimum standard deviation is zero")
    try:
        h2o.naive_bayes(x=iris[0:4], y=iris[4], min_sdev=0)
        assert False, "Expected naive bayes algo to fail on min_sdev = 0"
    except:
        pass

    print("Response column is not categorical")
    try:
        h2o.naive_bayes(x=iris[0:3], y=iris[3], min_sdev=0)
        assert False, "Expected naive bayes algo to fail on response not categorical"
    except:
        pass
示例#5
0
def nb_prostate():

    print "Importing prostate.csv data..."
    prostate = h2o.upload_file(tests.locate("smalldata/logreg/prostate.csv"))

    print "Converting CAPSULE, RACE, DCAPS, and DPROS to categorical"
    prostate['CAPSULE'] = prostate['CAPSULE'].asfactor()
    prostate['RACE'] = prostate['CAPSULE'].asfactor()
    prostate['DCAPS'] = prostate['DCAPS'].asfactor()
    prostate['DPROS'] = prostate['DPROS'].asfactor()

    print "Compare with Naive Bayes when x = 3:9, y = 2"
    prostate_nb = h2o.naive_bayes(x=prostate[2:9], y=prostate[1], laplace=0)
    prostate_nb.show()

    print "Predict on training data"
    prostate_pred = prostate_nb.predict(prostate)
    prostate_pred.head()
示例#6
0
def nb_prostate(ip, port):
    h2o.init(ip, port)

    print "Importing prostate.csv data..."
    prostate = h2o.upload_file(h2o.locate("smalldata/logreg/prostate.csv"))

    print "Converting CAPSULE, RACE, DCAPS, and DPROS to categorical"
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()
    prostate["RACE"] = prostate["CAPSULE"].asfactor()
    prostate["DCAPS"] = prostate["DCAPS"].asfactor()
    prostate["DPROS"] = prostate["DPROS"].asfactor()

    print "Compare with Naive Bayes when x = 3:9, y = 2"
    prostate_nb = h2o.naive_bayes(x=prostate[2:9], y=prostate[1], laplace=0)
    prostate_nb.show()

    print "Predict on training data"
    prostate_pred = prostate_nb.predict(prostate)
    prostate_pred.head()
def nb_prostate():
    

    print("Importing prostate.csv data...")
    prostate = h2o.upload_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))

    print("Converting CAPSULE, RACE, DCAPS, and DPROS to categorical")
    prostate['CAPSULE'] = prostate['CAPSULE'].asfactor()
    prostate['RACE'] = prostate['CAPSULE'].asfactor()
    prostate['DCAPS'] = prostate['DCAPS'].asfactor()
    prostate['DPROS'] = prostate['DPROS'].asfactor()

    print("Compare with Naive Bayes when x = 3:9, y = 2")
    prostate_nb = h2o.naive_bayes(x=prostate[2:9], y=prostate[1], laplace = 0)
    prostate_nb.show()

    print("Predict on training data")
    prostate_pred = prostate_nb.predict(prostate)
    prostate_pred.head()
def nb_baddata():

    rawdata = [[random.gauss(0, 1) for r in range(100)] for c in range(10)]

    print "Training data with all NA's"
    train = [["NA" for r in range(100)] for c in range(10)]
    train_h2o = h2o.H2OFrame(python_obj=train)
    try:
        h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
        assert False, "Expected naive bayes algo to fail on training data of all NA's"
    except:
        pass

    # Response column must be categorical
    print "Training data with a numeric response column"
    train_h2o = h2o.H2OFrame(python_obj=rawdata)
    try:
        h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
        assert False, "Expected naive bayes algo to fail on training data with a numeric response column"
    except:
        pass

    # Constant response dropped before model building
    print "Training data with a constant response: drop and throw error"
    rawdata[0] = 100 * ["A"]
    train_h2o = h2o.H2OFrame(python_obj=rawdata)
    try:
        h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
        assert (
            False
        ), "Expected naive bayes algo to fail on training data with a constant response: drop and throw error"
    except:
        pass

    # Predictors with constant value automatically dropped
    print "Training data with 1 col of all 5's: drop automatically"
    rawdata = [[random.gauss(0, 1) for r in range(100)] for c in range(10)]
    rawdata[4] = 100 * [5]
    rawdata[0] = [random.choice(string.letters) for _ in range(100)]
    train_h2o = h2o.H2OFrame(python_obj=rawdata)
    model = h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
    assert len(model._model_json["output"]["pcond"]) == 8, "Expected 8 predictors, but got {0}" "".format(
        len(model._model_json["output"]["pcond"])
    )
示例#9
0
def nb_baddata():

    rawdata = [[random.gauss(0, 1) for c in range(10)] for r in range(100)]

    print "Training data with all NA's"
    train = [["NA" for c in range(10)] for r in range(100)]
    train_h2o = h2o.H2OFrame(python_obj=train)
    try:
        h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
        assert False, "Expected naive bayes algo to fail on training data of all NA's"
    except:
        pass

    # Response column must be categorical
    print "Training data with a numeric response column"
    train_h2o = h2o.H2OFrame(python_obj=rawdata)
    try:
        h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
        assert False, "Expected naive bayes algo to fail on training data with a numeric response column"
    except:
        pass

    # Constant response dropped before model building
    print "Training data with a constant response: drop and throw error"
    for r in range(100):
        rawdata[r][0] = "A"
    train_h2o = h2o.H2OFrame(python_obj=rawdata)
    try:
        h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
        assert False, "Expected naive bayes algo to fail on training data with a constant response: drop and throw error"
    except:
        pass

    # Predictors with constant value automatically dropped
    print "Training data with 1 col of all 5's: drop automatically"
    rawdata = [[random.gauss(0, 1) for c in range(10)] for r in range(100)]
    for r in range(100):
        rawdata[r][4] = 5
    for r in range(100):
        rawdata[r][0] = random.choice(string.letters)
    train_h2o = h2o.H2OFrame(python_obj=rawdata)
    model = h2o.naive_bayes(x=train_h2o[1:10], y=train_h2o[0])
    assert len(model._model_json['output']['pcond']) == 8, "Expected 8 predictors, but got {0}" \
                                                           "".format(len(model._model_json['output']['pcond']))