def deepwater_custom_lenet(): if not H2ODeepWaterEstimator.available(): return frame = h2o.import_file( pyunit_utils.locate( "bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) print(frame.head(5)) nclasses = frame[1].nlevels()[0] print( "Creating the model architecture from scratch using the MXNet Python API" ) lenet(nclasses).save("/tmp/symbol_lenet-py.json") print("Importing the model architecture for training in H2O") model = H2ODeepWaterEstimator( epochs=50, learning_rate=1e-3, mini_batch_size=32, network='user', network_definition_file="/tmp/symbol_lenet-py.json", image_shape=[28, 28], channels=1, score_interval=0, train_samples_per_iteration=1000, gpu=False) model.train(x=[0], y=1, training_frame=frame) model.show() error = model.model_performance(train=True).mean_per_class_error() assert error < 0.1, "mean classification error is too high : " + str(error)
def deepwater_custom_regression(): if not H2ODeepWaterEstimator.available(): return train = h2o.import_file( pyunit_utils.locate("bigdata/laptop/lending-club/LoanStats3a.csv")) response = 'loan_amnt' predictors = list( set(train.names) - set([ response, 'id', 'emp_title', 'title', 'desc', 'revol_util', 'zip_code' ])) ## remove high-cardinality columns print( "Creating the model architecture from scratch using the MXNet Python API" ) net().save("/tmp/symbol-py.json") print("Importing the model architecture for training in H2O") model = H2ODeepWaterEstimator(epochs=100, learning_rate=1e-4, mini_batch_size=64, hidden=[1], activation="tanh") #network='user', network_definition_file="/tmp/symbol-py.json") model.train(x=predictors, y=response, training_frame=train, nfolds=3) model.show() error = model.model_performance(xval=True).rmse() assert error < 10, "mean xval rmse is too high : " + str(error)
def deepwater_checkpoint(): if not H2ODeepWaterEstimator.available(): return ## build a model #frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) frame = h2o.import_file( pyunit_utils.locate("smalldata/prostate/prostate.csv")) frame.drop(0) frame[1] = frame[1].asfactor() print(frame.head(5)) model = H2ODeepWaterEstimator(epochs=50, learning_rate=1e-5, stopping_rounds=0, score_duty_cycle=1, train_samples_per_iteration=-1, score_interval=0) model.train(y=1, training_frame=frame) ## save the model model_path = h2o.save_model(model) ## delete everything - simulate cluster shutdown and restart h2o.remove_all() ## reimport the model and the frame model = h2o.load_model(model_path) #frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) frame = h2o.import_file( pyunit_utils.locate("smalldata/prostate/prostate.csv")) frame.drop(0) frame[1] = frame[1].asfactor() ## delete the checkpoint file os.remove(model_path) ## continue training model2 = H2ODeepWaterEstimator(epochs=100, learning_rate=1e-5, stopping_rounds=0, score_duty_cycle=1, train_samples_per_iteration=-1, score_interval=0, checkpoint=model.model_id) model2.train(y=1, training_frame=frame) model2.show()
def deepwater_lenet(): if not H2ODeepWaterEstimator.available(): return frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) print(frame.head(5)) model = H2ODeepWaterEstimator(epochs=100, learning_rate=1e-3, network='lenet', score_interval=0, train_samples_per_iteration=1000) model.train(x=[0],y=1, training_frame=frame) model.show() error = model.model_performance(train=True).mean_per_class_error() assert error < 0.1, "mean classification error is too high : " + str(error)
def deepwater_regression(): if not H2ODeepWaterEstimator.available(): return frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) frame[1] = frame[1].asnumeric() print(frame.head(5)) model = H2ODeepWaterEstimator(epochs=50, learning_rate=1e-3) model.train(x=[0],y=1, training_frame=frame) model.show() error = model.model_performance(train=True).mae() assert error < 0.3, "mean absolute error is too high : " + str(error)
def deepwater_inception_bn_feature_extraction(): if not H2ODeepWaterEstimator.available(): return frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) print(frame.head(5)) nclasses = frame[1].nlevels()[0] print("Downloading the model") with open("model.json", "wb") as file: response = get("https://raw.githubusercontent.com/h2oai/deepwater/master/mxnet/src/main/resources/deepwater/backends/mxnet/models/Inception/Inception_BN-symbol.json") file.write(response.content) with open("model.params", "wb") as file: response = get("https://raw.githubusercontent.com/h2oai/deepwater/master/mxnet/src/main/resources/deepwater/backends/mxnet/models/Inception/Inception_BN-0039.params") file.write(response.content) with open("mean_224.nd", "wb") as file: response = get("https://raw.githubusercontent.com/h2oai/deepwater/master/mxnet/src/main/resources/deepwater/backends/mxnet/models/Inception/mean_224.nd") file.write(response.content) print("Importing the model architecture for training in H2O") model = H2ODeepWaterEstimator(epochs=0, ## no training - just load the state - NOTE: training for this 3-class problem wouldn't work since the model has 1k classes mini_batch_size=32, ## mini-batch size is used for scoring ## all parameters below are needed network='user', network_definition_file=os.getcwd() + "/model.json", network_parameters_file=os.getcwd() + "/model.params", mean_image_file=os.getcwd() + "/mean_224.nd", image_shape=[224,224], channels=3 ) model.train(x=[0],y=1, training_frame=frame) ## must call train() to initialize the model, but it isn't training ## Extract deep features from final layer before going into Softmax. extracted_features = model.deepfeatures(frame, "global_pool_output") extracted_features2 = model.deepfeatures(frame, "conv_5b_double_3x3_1_output") ## Cleanup (first) os.remove("model.json") os.remove("model.params") os.remove("mean_224.nd") print(extracted_features.ncol) assert extracted_features.ncol == 1024 print(extracted_features2.ncol) assert extracted_features2.ncol == 10976 ## Find the squared cosine similarity between the first 10 images and the rest df = extracted_features[:10,:].distance(extracted_features[10:,:], "cosine_sq") print(df) assert df.shape[0] == 257 assert df.shape[1] == 10
def deepwater_tweets(): if not H2ODeepWaterEstimator.available(): return tweets = h2o.import_file(pyunit_utils.locate("/home/arno/tweets.txt"), col_names=["text"], sep="|") labels = h2o.import_file(pyunit_utils.locate("/home/arno/labels.txt"), col_names=["label"]) frame = tweets.cbind(labels) print(frame.head(5)) # cnn = make_text_cnn(sentence_size=100, num_embed=300, batch_size=32, # vocab_size=100000, dropout=dropout, with_embedding=with_embedding) model = H2ODeepWaterEstimator(epochs=50000, learning_rate=1e-3, hidden=[100,100,100,100,100]) model.train(x=[0],y=1, training_frame=frame) model.show() error = model.model_performance(train=True).mean_per_class_error() assert error < 0.1, "mean classification error is too high : " + str(error)
def deepwater_lenet(): if not H2ODeepWaterEstimator.available(): return frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) print(frame.head(5)) model = H2ODeepWaterEstimator(epochs=100, learning_rate=1e-3, network='lenet', score_interval=0, train_samples_per_iteration=1000) model.train(x=[0],y=1, training_frame=frame) extracted = model.deepfeatures(frame, "pooling1_output") #print(extracted.describe()) print(extracted.ncols) assert extracted.ncols == 800, "extracted frame doesn't have 800 columns" extracted = model.deepfeatures(frame, "activation2_output") #print(extracted.describe()) print(extracted.ncols) assert extracted.ncols == 500, "extracted frame doesn't have 500 columns" h2o.remove_all()
def deepwater_inception_resnet_v2(): if not H2ODeepWaterEstimator.available(): return frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")) print(frame.head(5)) nclasses = frame[1].nlevels()[0] print("Creating the model architecture from scratch using the MXNet Python API") get_symbol(nclasses).save("/tmp/symbol_inception_resnet_v2-py.json") print("Importing the model architecture for training in H2O") model = H2ODeepWaterEstimator(epochs=50, #learning_rate=1e-3, learning_rate_annealing=1e-5, mini_batch_size=16, ## provide network specific information network='user', network_definition_file="/tmp/symbol_inception_resnet_v2-py.json", image_shape=[299,299], channels=3) model.train(x=[0],y=1, training_frame=frame) model.show() error = model.model_performance(train=True).mean_per_class_error() assert error < 0.1, "mean classification error is too high : " + str(error)
def deepwater_custom_lenet_mnist(): if not H2ODeepWaterEstimator.available(): return train = h2o.import_file( pyunit_utils.locate("bigdata/laptop/mnist/train.csv.gz")) test = h2o.import_file( pyunit_utils.locate("bigdata/laptop/mnist/test.csv.gz")) predictors = list(range(0, 784)) resp = 784 train[resp] = train[resp].asfactor() test[resp] = test[resp].asfactor() nclasses = train[resp].nlevels()[0] print( "Creating the lenet model architecture from scratch using the MXNet Python API" ) lenet(nclasses).save("/tmp/symbol_lenet-py.json") print("Importing the lenet model architecture for training in H2O") model = H2ODeepWaterEstimator( epochs=10, learning_rate=0.05, learning_rate_annealing=1e-5, momentum_start=0.9, momentum_stable=0.9, mini_batch_size=128, train_samples_per_iteration=0, score_duty_cycle=0, stopping_rounds=0, ignore_const_cols=False, network_definition_file="/tmp/symbol_lenet-py.json", image_shape=[28, 28], channels=1) model.train(x=predictors, y=resp, training_frame=train) model.show() print(model.model_performance(valid=True)) error = model.model_performance(test).mean_per_class_error() assert error < 0.1, "mean classification error on validation set is too high : " + str( error)
def deepwater_custom_regression(): if not H2ODeepWaterEstimator.available(): return train = h2o.import_file(pyunit_utils.locate("bigdata/laptop/lending-club/LoanStats3a.csv")) train = train[:39786,:] ## only the first 39786 records are correct, the rest is from a different dataset response = 'loan_amnt' predictors = list(set(train.names) - set([response, 'id','emp_title','title','desc','revol_util','zip_code'])) ## remove high-cardinality columns print("Creating the model architecture from scratch using the MXNet Python API") PATH = "/tmp/symbol-py.json" net().save(PATH) print("Importing the model architecture for training in H2O") model = H2ODeepWaterEstimator(epochs=20, nfolds=3, network_definition_file=PATH) model.train(x=predictors, y=response, training_frame=train) model.show() error = model.model_performance(xval=True).rmse() assert error < 2000, "mean xval rmse is too high : " + str(error)
def deepwater_custom_cnn_mnist(): if not H2ODeepWaterEstimator.available(): return train = h2o.import_file( pyunit_utils.locate("bigdata/laptop/mnist/train.csv.gz")) test = h2o.import_file( pyunit_utils.locate("bigdata/laptop/mnist/test.csv.gz")) predictors = list(range(0, 784)) resp = 784 train[resp] = train[resp].asfactor() test[resp] = test[resp].asfactor() nclasses = train[resp].nlevels()[0] print( "Creating the cnn model architecture from scratch using the MXNet Python API" ) cnn(nclasses).save("/tmp/symbol_custom-py.json") print("Importing the cnn model architecture for training in H2O") model = H2ODeepWaterEstimator( epochs=100, learning_rate=1e-3, mini_batch_size=64, network='user', network_definition_file="/tmp/symbol_custom-py.json", image_shape=[28, 28], channels=1) model.train(x=predictors, y=resp, training_frame=train, validation_frame=test) model.show() error = model.model_performance(valid=True).mean_per_class_error() assert error < 0.1, "mean classification error on validation set is too high : " + str( error)
def cv_airlines(): if not H2ODeepWaterEstimator.available(): return df = h2o.import_file( path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip")) predictors = [ "Year", "Month", "DayofMonth", "DayOfWeek", "CRSDepTime", "CRSArrTime", "UniqueCarrier", "FlightNum" ] response_col = "IsDepDelayed" dl = H2ODeepWaterEstimator( # cross-validation nfolds=3, # network (fully-connected) hidden=[200, 200], activation="Rectifier", # regularization hidden_dropout_ratios=[0.1, 0.1], input_dropout_ratio=0.0, # learning rate learning_rate=5e-3, learning_rate_annealing=1e-6, # momentum momentum_start=0.9, momentum_stable=0.99, momentum_ramp=1e7, # early stopping epochs=100, stopping_rounds=4, train_samples_per_iteration=30000, # score often for early stopping mini_batch_size=32, score_duty_cycle=0.25, score_interval=1) dl.train(x=predictors, y=response_col, training_frame=df) print(dl.show())
def deepwater_demo(): if not H2ODeepWaterEstimator.available(): return # Training data train_data = h2o.import_file(path=tests.locate("smalldata/gbm_test/ecology_model.csv")) train_data = train_data.drop('Site') train_data['Angaus'] = train_data['Angaus'].asfactor() print(train_data.describe()) train_data.head() # Testing data test_data = h2o.import_file(path=tests.locate("smalldata/gbm_test/ecology_eval.csv")) test_data['Angaus'] = test_data['Angaus'].asfactor() print(test_data.describe()) test_data.head() # Run DeepWater (ideally, use a GPU - this would be slow on CPUs) dl = H2ODeepWaterEstimator(epochs=50, hidden=[4096,4096,4096], hidden_dropout_ratios=[0.2,0.2,0.2]) dl.train(x=list(range(1,train_data.ncol)), y="Angaus", training_frame=train_data, validation_frame=test_data) dl.show()
}, "parameters": { "global_step": global_step.name } }) tf.add_to_collection("meta", meta) filename = project_path + "/models/mymodel_tensorflow.meta" tf.train.export_meta_graph(filename, saver_def=saver.as_saver_def()) return (filename) network_def_path = simple_model(28, 28, 1, num_classes) # Build Deep Water MXNet Model from h2o.estimators.deepwater import H2ODeepWaterEstimator model_mnist_mymodel_tf = H2ODeepWaterEstimator( epochs=80, network_definition_file=network_def_path, backend="tensorflow", image_shape=[28, 28], channels=1, model_id="model_mnist_mymodel_tf") model_mnist_mymodel_tf.train(x=["uri"], y="label", training_frame=mnist_training, validation_frame=mnist_testing) model_mnist_mymodel_tf.show()
from __future__ import print_function import sys, os sys.path.insert(1, os.path.join("..", "..", "..")) import h2o from h2o.estimators.deepwater import H2ODeepWaterEstimator # Start and connect to H2O local cluster h2o.init() # Import CSV frame = h2o.import_file("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv") print(frame.head(5)) # Define LeNet model model = H2ODeepWaterEstimator(epochs=300, rate=1e-3, network='lenet', score_interval=0, train_samples_per_iteration=1000) # Train LeNet model on GPU model.train(x=[0], y=1, training_frame=frame) model.show()
def cv_cars_dw(): if not H2ODeepWaterEstimator.available(): return # read in the dataset and construct training set (and validation set) cars = h2o.import_file(path=pyunit_utils.locate("smalldata/junit/cars_20mpg.csv")) # choose the type model-building exercise (multinomial classification or regression). 0:regression, 1:binomial, # 2:multinomial problem = random.sample(list(range(2)),1)[0] + 1 # only do classification # pick the predictors and the correct response column predictors = ["displacement","power","weight","acceleration","year"] if problem == 1 : response_col = "economy_20mpg" cars[response_col] = cars[response_col].asfactor() elif problem == 2 : response_col = "cylinders" cars[response_col] = cars[response_col].asfactor() else : response_col = "economy" print("Response column: {0}".format(response_col)) ## cross-validation # 1. basic dl = H2ODeepWaterEstimator(nfolds=random.randint(3,10),fold_assignment="Modulo",hidden=[20,20],epochs=10) dl.train(x=predictors, y=response_col, training_frame=cars) # 2. check that cv metrics are different over repeated "Random" runs nfolds = random.randint(3,10) dl1 = H2ODeepWaterEstimator(nfolds=nfolds,fold_assignment="Random",hidden=[20,20],epochs=10) dl1.train(x=predictors,y=response_col,training_frame=cars) dl2 = H2ODeepWaterEstimator(nfolds=nfolds,fold_assignment="Random",hidden=[20,20],epochs=10) try: pyunit_utils.check_models(dl1, dl2, True) assert False, "Expected models to be different over repeated Random runs" except AssertionError: assert True # 3. folds_column num_folds = random.randint(2,5) fold_assignments = h2o.H2OFrame([[random.randint(0,num_folds-1)] for _ in range(cars.nrow)]) fold_assignments.set_names(["fold_assignments"]) cars = cars.cbind(fold_assignments) dl = H2ODeepWaterEstimator(keep_cross_validation_predictions=True,hidden=[20,20],epochs=10) dl.train(x=predictors,y=response_col,training_frame=cars,fold_column="fold_assignments") num_cv_models = len(dl._model_json['output']['cross_validation_models']) assert num_cv_models==num_folds, "Expected {0} cross-validation models, but got " \ "{1}".format(num_folds, num_cv_models) cv_model1 = h2o.get_model(dl._model_json['output']['cross_validation_models'][0]['name']) cv_model2 = h2o.get_model(dl._model_json['output']['cross_validation_models'][1]['name']) # 4. keep_cross_validation_predictions cv_predictions = dl1._model_json['output']['cross_validation_predictions'] ## boundary cases # 1. nfolds = number of observations (leave-one-out cross-validation) dl = H2ODeepWaterEstimator(nfolds=cars.nrow, fold_assignment="Modulo",hidden=[20,20],epochs=10) dl.train(x=predictors,y=response_col,training_frame=cars) # 2. nfolds = 0 dl = H2ODeepWaterEstimator(nfolds=0,hidden=[20,20],epochs=10) dl.train(x=predictors,y=response_col,training_frame=cars) # 3. cross-validation and regular validation attempted dl = H2ODeepWaterEstimator(nfolds=random.randint(3,10),hidden=[20,20],epochs=10) dl.train(x=predictors, y=response_col, training_frame=cars, validation_frame=cars) ## error cases # 1. nfolds == 1 or < 0 try: dl = H2ODeepWaterEstimator(nfolds=random.sample([-1,1], 1)[0],hidden=[20,20],epochs=10) dl.train(x=predictors, y=response_col, training_frame=cars) assert False, "Expected model-build to fail when nfolds is 1 or < 0" except EnvironmentError: assert True # 2. more folds than observations try: dl = H2ODeepWaterEstimator(nfolds=cars.nrow+1,fold_assignment="Modulo",hidden=[20,20],epochs=10) dl.train(x=predictors, y=response_col, training_frame=cars) assert False, "Expected model-build to fail when nfolds > nobs" except EnvironmentError: assert True # 3. fold_column and nfolds both specified try: dl = H2ODeepWaterEstimator(nfolds=3, hidden=[20, 20], epochs=10) dl.train(x=predictors, y=response_col, fold_column="fold_assignments", training_frame=cars) assert False, "Expected model-build to fail when fold_column and nfolds both specified" except EnvironmentError: assert True
def algo_max_runtime_secs(): ''' This pyunit test is written to ensure that the max_runtime_secs can restrict the model training time for all h2o algos. See PUBDEV-4702. ''' global model_within_max_runtime global err_bound seed = 12345 # deeplearning training1_data = h2o.import_file(path=pyunit_utils.locate( "smalldata/gridsearch/gaussian_training1_set.csv")) y_index = training1_data.ncol - 1 x_indices = list(range(y_index)) model = H2ODeepLearningEstimator(distribution='gaussian', seed=seed, hidden=[10, 10, 10]) grabRuntimeInfo(err_bound, 2.0, model, training1_data, x_indices, y_index) cleanUp([training1_data, model]) # stack ensemble, stacking part is not iterative print( "******************** Skip testing stack ensemble. Not an iterative algo." ) # GBM run training1_data = h2o.import_file(path=pyunit_utils.locate( "smalldata/gridsearch/multinomial_training1_set.csv")) y_index = training1_data.ncol - 1 x_indices = list(range(y_index)) training1_data[y_index] = training1_data[y_index].round().asfactor() model = H2OGradientBoostingEstimator(distribution="multinomial", seed=seed) grabRuntimeInfo(err_bound, 2.0, model, training1_data, x_indices, y_index) cleanUp([model]) # GLM run model = H2OGeneralizedLinearEstimator(family='multinomial', seed=seed) grabRuntimeInfo(err_bound, 2.0, model, training1_data, x_indices, y_index) cleanUp([model]) # naivebayes, not iterative print( "******************** Skip testing Naives Bayes. Not an iterative algo." ) # random foreset model = H2ORandomForestEstimator(ntrees=100, score_tree_interval=0) grabRuntimeInfo(err_bound, 2.0, model, training1_data, x_indices) cleanUp([model, training1_data]) # deepwater if H2ODeepWaterEstimator.available(): training1_data = h2o.import_file( path=pyunit_utils.locate("smalldata/gbm_test/ecology_model.csv")) training1_data = training1_data.drop('Site') training1_data['Angaus'] = training1_data['Angaus'].asfactor() y_index = "Angaus" x_indices = list(range(1, training1_data.ncol)) model = H2ODeepWaterEstimator(epochs=50, hidden=[4096, 4096, 4096], hidden_dropout_ratios=[0.2, 0.2, 0.2]) grabRuntimeInfo(err_bound, 2.0, model, training1_data, x_indices, y_index) cleanUp([training1_data, model]) # GLRM, do not make sense to stop in the middle of an iteration training1_data = h2o.import_file( path=pyunit_utils.locate("smalldata/gridsearch/glrmdata1000x25.csv")) x_indices = list(range(training1_data.ncol)) model = H2OGeneralizedLowRankEstimator(k=10, loss="Quadratic", gamma_x=0.3, gamma_y=0.3, transform="STANDARDIZE") grabRuntimeInfo(err_bound, 2.0, model, training1_data, x_indices) cleanUp([training1_data, model]) # PCA training1_data = h2o.import_file( path=pyunit_utils.locate("smalldata/gridsearch/pca1000by25.csv")) x_indices = list(range(training1_data.ncol)) model = H2OPCA(k=10, transform="STANDARDIZE", pca_method="Power", compute_metrics=True) grabRuntimeInfo(err_bound * 3, 1.2, model, training1_data, x_indices) cleanUp([training1_data, model]) # kmeans training1_data = h2o.import_file(path=pyunit_utils.locate( "smalldata/gridsearch/kmeans_8_centers_3_coords.csv")) x_indices = list(range(training1_data.ncol)) model = H2OKMeansEstimator(k=10) grabRuntimeInfo(err_bound * 2, 2.0, model, training1_data, x_indices) cleanUp([training1_data, model]) # word2vec train = h2o.import_file(pyunit_utils.locate("bigdata/laptop/text8.gz"), header=1, col_types=["string"]) used = train[0:170000, 0] w2v_model = H2OWord2vecEstimator() grabRuntimeInfo(err_bound, 2.0, w2v_model, used, [], 0) cleanUp([train, used, w2v_model]) if sum(model_within_max_runtime) > 0: sys.exit(1)
import h2o from h2o.estimators.deepwater import H2ODeepWaterEstimator # Start or connect to H2O h2o.init() # Import data and transform data train = h2o.import_file("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv") # Build model model = H2ODeepWaterEstimator(epochs=10, network="lenet", problem_type="image", image_shape=[28, 28], channels=3) model.train(x=[0], y=1, training_frame=train) # Evaluate model model.show()
# Fully connected layer 1 flatten = mx.symbol.Flatten(data=pool2) fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500) act3 = mx.symbol.Activation(data=fc1, act_type="tanh") # Fully connected layer 2 fc2 = mx.symbol.FullyConnected(data=act3, num_hidden=500) out = mx.symbol.SoftmaxOutput(data=fc2, name="softmax") return out sym_my_lenet = lenet(num_classes) network_def_path = project_path + "/models/sym_my_lenet.json" sym_my_lenet.save(network_def_path) # Build Deep Water MXNet Model from h2o.estimators.deepwater import H2ODeepWaterEstimator model_mnist_mylenet_mx = H2ODeepWaterEstimator( epochs=80, network_definition_file=network_def_path, image_shape=[28, 28], channels=1, model_id="model_mnist_mylenet_mx") model_mnist_mylenet_mx.train(x=["uri"], y="label", training_frame=mnist_training, validation_frame=mnist_testing) model_mnist_mylenet_mx.show()
#http://data.dmlc.ml/mxnet/models/imagenet/inception-bn_old.tar.gz# import h2o from h2o.estimators.deepwater import H2ODeepWaterEstimator # Start or connect to H2O h2o.init(nthreads=-1, strict_version_check=False) # Import data and transform data train = h2o.import_file("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv") # Load network network_model = H2ODeepWaterEstimator( epochs=0, mini_batch_size=32, network="user", network_definition_file="Inception_BN-symbol.json", network_parameters_file="Inception_BN-0039.params", mean_image_file="mean_224.nd", image_shape=[224, 224], channels=3) network_model.train(x=[0], y=1, training_frame=train) # Extract deep features extracted_features = network_model.deepfeatures(train, "global_pool_output") # Seperate records to a references and queries references = extracted_features[5:, :] queries = extracted_features[:3, :] # Compute similarity
import h2o from h2o.estimators.deepwater import H2ODeepWaterEstimator # Start or connect to H2O h2o.init() # Import data and transform data train = h2o.import_file("bigdata/laptop/mnist/train.csv.gz") # Specify a subset of features to include in the model features = list(range(0,784)) target = 784 train[target] = train[target].asfactor() # Build model model = H2ODeepWaterEstimator(epochs=100, activation="Rectifier", hidden=[200,200], ignore_const_cols=False, mini_batch_size=256, input_dropout_ratio=0.1, hidden_dropout_ratios=[0.5,0.5], stopping_rounds=3, stopping_tolerance=0.05, stopping_metric="misclassification", score_interval=2, score_duty_cycle=0.5, score_training_samples=1000, score_validation_samples=1000, nfolds=5, gpu=True, seed=1234) model.train(x=features, y=target, training_frame=train) # Evaluate model model.show() print(model.scoring_history())
nfolds=nfolds, ignore_const_cols=False, keep_cross_validation_predictions=True, fold_assignment="Modulo") glm_model.train(x=features, y=target, training_frame=train, model_id="glm_model") glm_model.show() # Deep Water Model dw_model = H2ODeepWaterEstimator(epochs=3, network="lenet", ignore_const_cols=False, image_shape=[28, 28], channels=1, standardize=False, seed=1234, nfolds=nfolds, keep_cross_validation_predictions=True, fold_assignment="Modulo") dw_model.train(x=features, y=target, training_frame=train, model_id="dw_model") dw_model.show() # Stacked Ensemble stack_all = H2OStackedEnsembleEstimator( base_models=[gbm_model.model_id, glm_model.model_id, dw_model.model_id]) stack_all.train(x=features, y=target, training_frame=train, validation_frame=valid, model_id="stack_all")
# Connect or Start H2O h2o.init() # Import Data mnist_training = h2o.import_file(project_path+"/data/mnist-training.csv") mnist_testing = h2o.import_file(project_path+"/data/mnist-testing.csv") mnist_training["label"] = mnist_training["label"].asfactor() mnist_testing["label"] = mnist_testing["label"].asfactor() # Explore Data print(mnist_training.head()) # Build Deep Water Models from h2o.estimators.deepwater import H2ODeepWaterEstimator model_mnist_lenet_mx = H2ODeepWaterEstimator(epochs=80, network="lenet") model_mnist_lenet_mx.train(x=["uri"], y="label", training_frame=mnist_training, validation_frame=mnist_testing, model_id="model_mnist_lenet_mx") model_mnist_lenet_mx.show() model_mnist_lenet_tf = H2ODeepWaterEstimator(epochs=80, network="lenet", backend="tensorflow") model_mnist_lenet_tf.train(x=["uri"], y="label", training_frame=mnist_training, validation_frame=mnist_testing, model_id="model_mnist_lenet_tf") model_mnist_lenet_tf.show() #model_mnist_lenet_caffe = H2ODeepWaterEstimator(epochs=80, network="lenet", backend="caffe") #model_mnist_lenet_caffe.train(x=["uri"], y="label", training_frame=mnist_training, validation_frame=mnist_testing, model_id="model_mnist_lenet_caffe") #model_mnist_lenet_caffe.show() # Save H2O Model h2o.save_model(model=model_mnist_lenet_mx, path=project_path+"/models/", force=True) h2o.save_model(model=model_mnist_lenet_tf, path=project_path+"/models/", force=True) #h2o.save_model(model=model_mnist_lenet_caffe, path=project_path+"/models/", force=True)
# Connect or Start H2O h2o.init() # Import Data mnist_training = h2o.import_file(project_path + "/data/mnist-training.csv") mnist_testing = h2o.import_file(project_path + "/data/mnist-testing.csv") mnist_training["label"] = mnist_training["label"].asfactor() mnist_testing["label"] = mnist_testing["label"].asfactor() # Explore Data print(mnist_training.head()) # Build Deep Water Model from h2o.estimators.deepwater import H2ODeepWaterEstimator model_mnist_lenet_mx = H2ODeepWaterEstimator(epochs=80, network="lenet") model_mnist_lenet_mx.train(x=["uri"], y="label", training_frame=mnist_training, validation_frame=mnist_testing, model_id="model_mnist_lenet_mx") model_mnist_lenet_mx.show() # Extract Deep Features from Model extracted_features = model_mnist_lenet_mx.deepfeatures(mnist_testing, "flatten0_output") print(extracted_features.dim) print(extracted_features[0:4, :])