示例#1
0
def pca_prostate(ip, port):
    h2o.init(ip, port)

    print "Importing prostate.csv data...\n"
    prostate = h2o.upload_file(h2o.locate("smalldata/logreg/prostate.csv"))

    print "Converting CAPSULE, RACE, DPROS and DCAPS columns to factors"
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()
    prostate["RACE"] = prostate["RACE"].asfactor()
    prostate["DPROS"] = prostate["DPROS"].asfactor()
    prostate["DCAPS"] = prostate["DCAPS"].asfactor()
    prostate.describe()

    print "PCA on columns 3 to 9 with k = 3, retx = FALSE, transform = 'STANDARDIZE'"
    fitPCA = h2o.prcomp(x=prostate[2:9], k=3, transform="NONE", pca_method="Power")
    pred1 = fitPCA.predict(prostate)
    pred2 = h2o.get_frame(fitPCA._model_json['output']['loading_key']['name'])

    print "Compare dimensions of projection and loading matrix"
    print "Projection matrix:\n"
    print pred1.head()
    print "Loading matrix:\n"
    print pred2.head()
    assert pred1.nrow() == pred2.nrow(), "Expected same number of rows, but got {0} and {1}".format(pred1.nrow(),
                                                                                                    pred2.nrow())
    assert pred1.ncol() == pred2.ncol(), "Expected same number of rows, but got {0} and {1}".format(pred1.ncol(),
                                                                                                    pred2.ncol())
示例#2
0
def screeplot_test():
    
    
    kwargs = {}
    kwargs['server'] = True

    australia = h2o.upload_file(pyunit_utils.locate("smalldata/pca_test/AustraliaCoast.csv"))
    australia_pca = h2o.prcomp(x=australia[0:8], k = 4, transform = "STANDARDIZE")
    australia_pca.screeplot(type="barplot", **kwargs)
    australia_pca.screeplot(type="lines", **kwargs)
示例#3
0
def screeplot_test(ip,port):
    # Connect to h2o
    h2o.init(ip,port)
    kwargs = {}
    kwargs['server'] = True

    australia = h2o.upload_file(h2o.locate("smalldata/pca_test/AustraliaCoast.csv"))
    australia_pca = h2o.prcomp(x=australia[0:8], k = 4, transform = "STANDARDIZE")
    australia_pca.screeplot(type="barplot", **kwargs)
    australia_pca.screeplot(type="lines", **kwargs)
示例#4
0
def screeplot_test(ip, port):

    kwargs = {}
    kwargs['server'] = True

    australia = h2o.upload_file(
        h2o.locate("smalldata/pca_test/AustraliaCoast.csv"))
    australia_pca = h2o.prcomp(x=australia[0:8], k=4, transform="STANDARDIZE")
    australia_pca.screeplot(type="barplot", **kwargs)
    australia_pca.screeplot(type="lines", **kwargs)
示例#5
0
def pca_arrests(ip, port):
    

    print "Importing USArrests.csv data..."
    arrestsH2O = h2o.upload_file(h2o.locate("smalldata/pca_test/USArrests.csv"))
    arrestsH2O.describe()

    for i in range(4):
        print "H2O PCA with " + str(i) + " dimensions:\n"
        print "Using these columns: {0}".format(arrestsH2O.names)
        pca_h2o = h2o.prcomp(x=arrestsH2O[0:4], k = i+1)
示例#6
0
def pca_arrests(ip, port):

    print "Importing USArrests.csv data..."
    arrestsH2O = h2o.upload_file(
        h2o.locate("smalldata/pca_test/USArrests.csv"))
    arrestsH2O.describe()

    for i in range(4):
        print "H2O PCA with " + str(i) + " dimensions:\n"
        print "Using these columns: {0}".format(arrestsH2O.names())
        pca_h2o = h2o.prcomp(x=arrestsH2O[0:4], k=i + 1)
示例#7
0
def pca_scoring():

    print "Importing arrests.csv data..."
    arrestsH2O = h2o.upload_file(h2o.locate("smalldata/pca_test/USArrests.csv"))

    print "Run PCA with transform = 'DEMEAN'"
    fitH2O = h2o.prcomp(x=arrestsH2O[0:4], k=4, transform="DEMEAN")
    # TODO: fitH2O.show()

    print "Project training data into eigenvector subspace"
    predH2O = fitH2O.predict(arrestsH2O)
    print "H2O Projection:"
    print predH2O.head()
def pca_scoring():

    print "Importing arrests.csv data..."
    arrestsH2O = h2o.upload_file(
        pyunit_utils.locate("smalldata/pca_test/USArrests.csv"))

    print "Run PCA with transform = 'DEMEAN'"
    fitH2O = h2o.prcomp(x=arrestsH2O[0:4], k=4, transform="DEMEAN")
    # TODO: fitH2O.show()

    print "Project training data into eigenvector subspace"
    predH2O = fitH2O.predict(arrestsH2O)
    print "H2O Projection:"
    predH2O.head()
def pca_prostate():

    print "Importing prostate.csv data...\n"
    prostate = h2o.upload_file(tests.locate("smalldata/logreg/prostate.csv"))

    print "Converting CAPSULE, RACE, DPROS and DCAPS columns to factors"
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()
    prostate["RACE"] = prostate["RACE"].asfactor()
    prostate["DPROS"] = prostate["DPROS"].asfactor()
    prostate["DCAPS"] = prostate["DCAPS"].asfactor()
    prostate.describe()

    print "PCA on columns 3 to 9 with k = 3, retx = FALSE, transform = 'STANDARDIZE'"
    fitPCA = h2o.prcomp(x=prostate[2:9], k=3, transform="NONE", pca_method="Power")
    pred = fitPCA.predict(prostate)

    print "Projection matrix:\n"
    print pred.head()
示例#10
0
def pca_prostate(ip, port):
    

    print "Importing prostate.csv data...\n"
    prostate = h2o.upload_file(h2o.locate("smalldata/logreg/prostate.csv"))

    print "Converting CAPSULE, RACE, DPROS and DCAPS columns to factors"
    prostate["CAPSULE"] = prostate["CAPSULE"].asfactor()
    prostate["RACE"] = prostate["RACE"].asfactor()
    prostate["DPROS"] = prostate["DPROS"].asfactor()
    prostate["DCAPS"] = prostate["DCAPS"].asfactor()
    prostate.describe()

    print "PCA on columns 3 to 9 with k = 3, retx = FALSE, transform = 'STANDARDIZE'"
    fitPCA = h2o.prcomp(x=prostate[2:9], k=3, transform="NONE", pca_method="Power")
    pred = fitPCA.predict(prostate)

    print "Projection matrix:\n"
    print pred.head()