示例#1
0
文件: Demo.py 项目: simonway/PythonML
def calculateError(actual, predict):
    actual = DataFrameParser.h2oToNumpyArray(actual)
    predict = DataFrameParser.h2oToNumpyArray(predict)

    error = 0
    for i in range(len(actual)):
        d = predict[i] - actual[i]
        if d > 0:
            error += math.exp(d / 10.0)
        elif d < 0:
            error += math.exp(d / 13.0)
    return error
示例#2
0
training_columns = list(p_filter.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')
training_columns.remove('BIN')

h_filter['BIN'] = h_filter['BIN'].asfactor()
h_test['BIN'] = h_test['BIN'].asfactor()

h2o.export_file(frame=h_test, path='test_sid.csv', force=True)
h2o.export_file(frame=h_filter, path='train_sid.csv', force=True)

model = H2ORandomForestEstimator(nbins=250,
                                 ntress=100,
                                 max_depth=50,
                                 nfolds=10)
model.train(x=training_columns, y='BIN', training_frame=h_filter)

predict = model.predict(test_data=h_test)
predict = DataFrameParser.h2oToList(predict['predict'])
actual = DataFrameParser.h2oToList(h_test['BIN'])

Measures.confusion_matrix(actual, predict)
print(predict)
print(actual)

h2o.download_pojo(model=model,
                  path="/home/wso2123/PycharmProjects/FeatureProcessor/",
                  get_jar=True)
training_columns = list(pTrain.columns)
training_columns.remove('UnitNumber')
training_columns.remove('Time')
training_columns.remove('RUL')

response_column = 'RUL'
print("OK")

model = H2OGradientBoostingEstimator(distribution='poisson', histogram_type='QuantilesGlobal', fold_assignment='auto')
#model = H2ORandomForestEstimator(ntrees=50, max_depth=20, nbins=100, seed=12345)
model.train(x=training_columns, y=response_column, training_frame=hTrain, validation_frame=hValidate)

print(model.model_performance(test_data=hTest))

predict = DataFrameParser.h2oToNumpyArray(model.predict(test_data=hTest))
actual = DataFrameParser.h2oToNumpyArray(hTest['RUL'])


Chart.residual_histogram(actual, predict)
Chart.residual_vs_estimated(actual, predict)
Chart.acutal_and_predict(actual, predict)








示例#4
0
def function():
    # AutoEncoder anomaly removal process
    p_train = ProcessData.trainData(moving_median_centered_average=True,
                                    standard_deviation=True,
                                    probability_distribution=True,
                                    bin_classification=True)
    p_test = ProcessData.testData(moving_median_centered_average=True,
                                  standard_deviation=True,
                                  probability_from_file=True,
                                  bin_classification=True)

    # Converting to h2o frane
    h_test = h2o.H2OFrame(p_test)
    h_test.set_names(list(p_test.columns))

    h_train = h2o.H2OFrame(p_train)
    h_train.set_names(list(p_train.columns))

    # Define autoencoder
    anomaly_model = H2OAutoEncoderEstimator(activation="Rectifier",
                                            hidden=[25, 12, 25],
                                            sparse=True,
                                            l1=1e-4,
                                            epochs=100)

    # Select relevant features
    anomaly_train_columns = list(p_train.columns)
    print(anomaly_train_columns)
    anomaly_train_columns.remove('RUL')
    anomaly_train_columns.remove('BIN')
    anomaly_train_columns.remove('UnitNumber')
    anomaly_train_columns.remove('Time')
    anomaly_train_columns.remove('Setting1')
    anomaly_train_columns.remove('Setting2')
    anomaly_train_columns.remove('Setting3')

    # Train model
    anomaly_model.train(x=anomaly_train_columns, training_frame=h_train)

    # Get reconstruction error
    reconstruction_error = anomaly_model.anomaly(test_data=h_train,
                                                 per_feature=False)
    error_str = reconstruction_error.get_frame_data()
    err_list = list(map(float, error_str.split("\n")[1:-1]))
    err_list = np.array(err_list)

    # Threshold
    threshold = np.amax(err_list) * 0.97

    print("Max Reconstruction Error       :", reconstruction_error.max())
    print("Threshold Reconstruction Error :", threshold)

    # Filter anomalies based on reconstruction error
    p_filter = Filter.filterDataAutoEncoder(panda_frame=p_train,
                                            reconstruction_error=err_list,
                                            threshold=threshold)

    # Drop features
    del p_filter['Setting3']
    del p_filter['Sensor1']
    del p_filter['Sensor5']
    del p_filter['Sensor10']
    del p_filter['Sensor16']
    del p_filter['Sensor18']
    del p_filter['Sensor19']

    h_filter = h2o.H2OFrame(p_filter)
    h_filter.set_names(list(p_filter.columns))

    h_test = h2o.H2OFrame(p_test)
    h_test.set_names(list(p_test.columns))

    training_columns = list(p_filter.columns)
    training_columns.remove('UnitNumber')
    training_columns.remove('Time')
    training_columns.remove('RUL')
    training_columns.remove('BIN')

    h_filter['BIN'] = h_filter['BIN'].asfactor()
    h_test['BIN'] = h_test['BIN'].asfactor()

    model = H2ODeepLearningEstimator(variable_importances=True)
    model.train(x=training_columns,
                y='BIN',
                training_frame=h_filter,
                nfolds=10)

    predict = model.predict(test_data=h_test)
    predict = DataFrameParser.h2oToList(predict['predict'])
    actual = DataFrameParser.h2oToList(h_test['BIN'])

    Measures.confusion_matrix(actual, predict)
    print(predict)
    print(actual)