def train_and_evaluate(args): show_lib_version() train_filename = args['train_filename'] bucket_name = args['bucket_name'] data_loc = os.path.join('gs://', bucket_name, 'data', train_filename) # data_loc = 'gs://ancient-snow-224803-ff/data/train.dense' print('data_loc:{}, train_filename:{}'.format(data_loc, train_filename)) # gsutil outputs everything to stderr so we need to divert it to stdout. subprocess.check_call(['gsutil', 'cp', data_loc, train_filename], stderr=sys.stdout) config = {"params": dict(n_estimators=50, )} x, y = load_data(train_filename) clf = model.build_estimator(config) clf.fit(x, y) model_name = 'model.joblib' joblib.dump(clf, model_name, compress=3) print("Save model to {0}".format(model_name)) upload_to_gs(model_name, bucket_name) try: print( subprocess.check_output(['pip freeze'], stdout=sys.stdout, stderr=sys.stderr)) except: pass
def run_experiment(hparams): """Run the training and evaluate using the high level API""" train_input = lambda: model.input_fn( filename=os.path.join(hparams.data_dir, 'train.tfrecords'), batch_size=hparams.train_batch_size ) eval_input = lambda: model.input_fn( filename=os.path.join(hparams.data_dir, 'test.tfrecords'), batch_size=hparams.eval_batch_size ) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps ) exporter = tf.estimator.FinalExporter('cnn', model.serving_input_fn) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='cnn-eval' ) estimator = model.build_estimator(model_dir=hparams.job_dir) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run_experiment(hparams): """Run the training and evaluate using the high level API""" train_input = lambda: model.input_fn(hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size) # Don't shuffle evaluation data eval_input = lambda: model.input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps) exporter = tf.estimator.FinalExporter( 'jimini', model.SERVING_FUNCTIONS[hparams.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='jimini-eval') run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=hparams.job_dir) print('model dir {}'.format(run_config.model_dir)) estimator = model.build_estimator( embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run(args): """Runs tensorflow model training. Args: args: Arguments parsed at program executions. """ estimator = model.build_estimator( output_dir=args.output_dir, first_layer_size=args.first_layer_size, num_layers=args.num_layers, dropout=args.dropout, learning_rate=args.learning_rate, save_checkpoints_steps=args.save_checkpoints_steps) train_input_fn = input_fn_utils.read_dataset( input_dir=args.input_dir, mode=tf.contrib.learn.ModeKeys.TRAIN, batch_size=args.batch_size) eval_input_fn = input_fn_utils.read_dataset( input_dir=args.input_dir, mode=tf.contrib.learn.ModeKeys.EVAL, batch_size=args.batch_size) serving_input_fn = input_fn_utils.get_serving_input_fn(args.input_dir) train_spec = tf.estimator.TrainSpec( input_fn=train_input_fn, hooks=[], max_steps=args.max_steps) exporter = tf.estimator.LatestExporter('exporter', serving_input_fn) eval_spec = tf.estimator.EvalSpec( input_fn=eval_input_fn, hooks=[], exporters=exporter) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(args): """Run the training and evaluate using the high level API.""" train_input = lambda: model.input_fn(args.train_files, num_epochs=args.num_epochs, batch_size=args.train_batch_size) # Don't shuffle evaluation data eval_input = lambda: model.input_fn( args.eval_files, batch_size=args.eval_batch_size, shuffle=False) train_spec = tf.estimator.TrainSpec(train_input, max_steps=args.train_steps) exporter = tf.estimator.FinalExporter( 'census', model.SERVING_FUNCTIONS[args.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=args.eval_steps, exporters=[exporter], name='census-eval') run_config = tf.estimator.RunConfig( session_config=_get_session_config_from_env_var()) run_config = run_config.replace(model_dir=args.job_dir) print('Model dir %s' % run_config.model_dir) estimator = model.build_estimator( embedding_size=args.embedding_size, # Construct layers sizes with exponential decay hidden_units=[ max(2, int(args.first_layer_size * args.scale_factor**i)) for i in range(args.num_layers) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run_experiment(hparams): """Run the training and evaluate using the high level API""" train_input = lambda: model.input_fn(hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size) # Don't shuffle evaluation data eval_input = lambda: model.input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps) exporter = tf.estimator.FinalExporter( 'airline', model.SERVING_FUNCTIONS[hparams.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='airline-eval') run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=hparams.job_dir) print('model dir {}'.format(run_config.model_dir)) estimator = model.build_estimator(model=hparams.model, config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(flags): """Runs model training and evaluation using TF Estimator API""" #Get TF transform metadata generated during preprocessing tf_transform_output = tft.TFTransformOutput(flags.input_dir) #Define training spec feature_spec = tf_transform_output.transformed_feature_spec() train_input_fn = functools.partial(input_util.input_fn, flags.input_dir, tf.estimator.ModeKeys.TRAIN, flags.train_batch_size, flags.num_epochs, label_name=metadata.LABEL_COLUMN, feature_spec=feature_spec) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=flags.train_steps) #Define eval spec eval_input_fn = functools.partial(input_util.input_fn, flags.input_dir, tf.estimator.ModeKeys.EVAL, flags.eval_batch_size, num_epochs=1, label_name=metadata.LABEL_COLUMN, feature_spec=feature_spec) exporter = tf.estimator.FinalExporter( "export", functools.partial(input_util.tfrecord_serving_input_fn, feature_spec, label_name=metadata.LABEL_COLUMN)) eval_spec = tf.estimator.EvalSpec(eval_input_fn, steps=flags.eval_steps, start_delay_secs=flags.eval_start_secs, exporters=[exporter], name='MRI-eval') steps_per_run_train = 7943 // (flags.train_batch_size * 4) steps_per_run_eval = 964 // (flags.eval_batch_size * 4) #additional configs required for using TPUs tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( flags.tpu) tpu_config = tf.contrib.tpu.TPUConfig( num_shards=8, # using Cloud TPU v2-8 iterations_per_loop=200) #Define training config run_config = tf.contrib.tpu.RunConfig(cluster=tpu_cluster_resolver, model_dir=flags.job_dir, tpu_config=tpu_config, save_checkpoints_steps=200, save_summary_steps=100) #Build the estimator feature_columns = model.get_feature_columns( tf_transform_output, exclude_columns=metadata.NON_FEATURE_COLUMNS) estimator = model.build_estimator(run_config, flags, feature_columns) #Run training and evaluation #tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) estimator.train(train_input_fn)
def main(hparams): """Run the training and evaluation using the high level API.""" with tf.gfile.GFile(hparams.train_file, "r") as f: train_df = pd.read_csv(f, compression='gzip') with tf.gfile.GFile(hparams.test_file, "r") as f: test_df = pd.read_csv(f, compression='gzip') tf.logging.info('Done fetching traing and test datasets.') trn_input = tf.estimator.inputs.pandas_input_fn( x=train_df, y=train_df["polarity"], num_epochs=None, shuffle=True, batch_size=hparams.batch_size, num_threads=4, queue_capacity=hparams.batch_size * 5 ) train_spec = tf.estimator.TrainSpec(trn_input, max_steps=hparams.train_steps) eval_input = tf.estimator.inputs.pandas_input_fn( x=test_df, y=test_df["polarity"], num_epochs=1, shuffle=False, batch_size=hparams.batch_size, num_threads=4, queue_capacity=hparams.batch_size * 5 ) # Construct our JSON serving function for predictions via API. exporter = tf.estimator.FinalExporter('model', model.build_serving_fn()) eval_spec = tf.estimator.EvalSpec( eval_input, throttle_secs=hparams.eval_secs, steps=hparams.eval_steps, exporters=[exporter], start_delay_secs=20 ) run_config = tf.estimator.RunConfig(model_dir=hparams.job_dir) # Construct layers sizes by halving each layer. hidden_units = [] for i in range(hparams.num_layers): units = hparams.first_layer_size / (2**i) hidden_units.append(units) estimator = model.build_estimator( config=run_config, hidden_units=hidden_units, learning_rate=hparams.learning_rate, dropout=hparams.dropout, optimizer=hparams.optimizer, hub_module=HUB_MODULES.get(hparams.hub_module), train_hub=hparams.train_hub_module ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_maybe_evaluate(hparams): """Run the training and evaluate using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. Returns: The estimator that was used for training (and maybe eval) """ schema = taxi.read_schema(hparams.schema_file) tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir) train_input = lambda: model.input_fn( hparams.train_files, tf_transform_output, batch_size=TRAIN_BATCH_SIZE ) eval_input = lambda: model.input_fn( hparams.eval_files, tf_transform_output, batch_size=EVAL_BATCH_SIZE ) train_spec = tf.estimator.TrainSpec( train_input, max_steps=hparams.train_steps) serving_receiver_fn = lambda: model.example_serving_receiver_fn( tf_transform_output, schema) exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec( eval_input, steps=hparams.eval_steps, exporters=[exporter], name='chicago-taxi-eval') run_config = tf.estimator.RunConfig( save_checkpoints_steps=999, keep_checkpoint_max=1) serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR) run_config = run_config.replace(model_dir=serving_model_dir) estimator = model.build_estimator( tf_transform_output, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i)) for i in range(NUM_DNN_LAYERS) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) return estimator
def train_and_maybe_evaluate(hparams): """Run the training and evaluate using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. Returns: The estimator that was used for training (and maybe eval) """ schema = taxi.read_schema(hparams.schema_file) train_input = lambda: model.input_fn( hparams.train_files, hparams.tf_transform_dir, batch_size=TRAIN_BATCH_SIZE ) eval_input = lambda: model.input_fn( hparams.eval_files, hparams.tf_transform_dir, batch_size=EVAL_BATCH_SIZE ) train_spec = tf.estimator.TrainSpec( train_input, max_steps=hparams.train_steps) serving_receiver_fn = lambda: model.example_serving_receiver_fn( hparams.tf_transform_dir, schema) exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn) eval_spec = tf.estimator.EvalSpec( eval_input, steps=hparams.eval_steps, exporters=[exporter], name='chicago-taxi-eval') run_config = tf.estimator.RunConfig( save_checkpoints_steps=999, keep_checkpoint_max=1) serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR) run_config = run_config.replace(model_dir=serving_model_dir) estimator = model.build_estimator( hparams.tf_transform_dir, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i)) for i in range(NUM_DNN_LAYERS) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) return estimator
def train_and_evaluate(args): """Run the training and evaluate using the high level API.""" def train_input(): """Input function returning batches from the training data set from training. """ return input_module.input_fn( args.train_files, num_epochs=args.num_epochs, batch_size=args.train_batch_size, num_parallel_calls=args.num_parallel_calls, prefetch_buffer_size=args.prefetch_buffer_size) def eval_input(): """Input function returning the entire validation data set for evaluation. Shuffling is not required. """ return input_module.input_fn( args.eval_files, batch_size=args.eval_batch_size, shuffle=False, num_parallel_calls=args.num_parallel_calls, prefetch_buffer_size=args.prefetch_buffer_size) train_spec = tf.estimator.TrainSpec(train_input, max_steps=args.train_steps) #exporter = tf.estimator.FinalExporter( # 'census', input_module.SERVING_FUNCTIONS[args.export_format]) eval_spec = tf.estimator.EvalSpec( eval_input, steps=args.eval_steps, #exporters=[exporter], name='census-eval') run_config = tf.estimator.RunConfig( session_config=_get_session_config_from_env_var(args)) #run_config = run_config.replace(model_dir=args.job_dir) #print('Model dir %s' % run_config.model_dir) estimator = model.build_estimator( embedding_size=args.embedding_size, # Construct layers sizes with exponential decay hidden_units=[ max(2, int(args.first_layer_size * args.scale_factor**i)) for i in range(args.num_layers) ], config=run_config) start_time = time.time() estimator.train(train_input, max_steps=args.train_steps) end_time = time.time() print("--------------") print("--------------Runing time: ", end_time - start_time) print(end_time - start_time)
def _experiment_fn(output_dir): return Experiment( model.build_estimator(output_dir), train_input_fn=model.get_input_fn( filename=os.path.join(data_dir, 'GOOG_series_train.csv'), batch_size=train_batch_size), eval_input_fn=model.get_input_fn( filename=os.path.join(data_dir, 'GOOG_series_validation.csv'), batch_size=eval_batch_size), train_steps=train_steps, eval_steps=eval_steps, **experiment_args )
def train_and_evaluate(args): """Run the training and evaluate using the high level API.""" def train_input(): """Input function returning batches from the training data set from training. """ return input_module.input_fn( args.train_files, num_epochs=args.num_epochs, batch_size=args.train_batch_size, num_parallel_calls=args.num_parallel_calls, prefetch_buffer_size=args.prefetch_buffer_size) def eval_input(): """Input function returning the entire validation data set for evaluation. Shuffling is not required. """ return input_module.input_fn( args.eval_files, batch_size=args.eval_batch_size, shuffle=False, num_parallel_calls=args.num_parallel_calls, prefetch_buffer_size=args.prefetch_buffer_size) train_spec = tf.estimator.TrainSpec( train_input, max_steps=args.train_steps) exporter = tf.estimator.FinalExporter( 'census', input_module.SERVING_FUNCTIONS[args.export_format]) eval_spec = tf.estimator.EvalSpec( eval_input, steps=args.eval_steps, exporters=[exporter], name='census-eval') run_config = tf.estimator.RunConfig( session_config=_get_session_config_from_env_var()) run_config = run_config.replace(model_dir=args.job_dir) print('Model dir %s' % run_config.model_dir) estimator = model.build_estimator( embedding_size=args.embedding_size, # Construct layers sizes with exponential decay hidden_units=[ max(2, int(args.first_layer_size * args.scale_factor**i)) for i in range(args.num_layers) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(flags): """Runs model training and evaluation using TF Estimator API.""" # Get TF transform metadata generated during preprocessing tf_transform_output = tft.TFTransformOutput(flags.input_dir) feature_spec = tf_transform_output.transformed_feature_spec() train_input_fn = functools.partial(input_util.input_fn, input_dir=flags.input_dir, mode=tf.estimator.ModeKeys.TRAIN, batch_size=flags.train_batch_size, num_epochs=flags.num_epochs, label_name=metadata.LABEL_COLUMN, feature_spec=feature_spec) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=flags.train_steps) eval_input_fn = functools.partial(input_util.input_fn, input_dir=flags.input_dir, mode=tf.estimator.ModeKeys.EVAL, batch_size=flags.eval_batch_size, num_epochs=1, label_name=metadata.LABEL_COLUMN, feature_spec=feature_spec) exporter = tf.estimator.FinalExporter( 'export', functools.partial(input_util.tfrecord_serving_input_fn, feature_spec=feature_spec, label_name=metadata.LABEL_COLUMN)) eval_spec = tf.estimator.EvalSpec(eval_input_fn, steps=flags.eval_steps, start_delay_secs=flags.eval_start_secs, exporters=[exporter], name='churn-eval') run_config = tf.estimator.RunConfig( save_checkpoints_steps=flags.checkpoint_steps, tf_random_seed=metadata.SEED, model_dir=flags.job_dir) feature_columns = model.get_feature_columns( tf_transform_output, exclude_columns=metadata.NON_FEATURE_COLUMNS) num_intervals = metadata.NUM_INTERVALS estimator = model.build_estimator(run_config, flags, feature_columns, num_intervals) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run_it(args): dp = model.data_pipeline(args.feature_file, args.label_file) train_input = lambda: dp.training() eval_input = lambda: dp.testing() train_spec = tf.estimator.TrainSpec(train_input, max_steps=10000) eval_spec = tf.estimator.EvalSpec(eval_input, steps=999) run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=args.model_dir) estimator = model.build_estimator(config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(args, l1, l2, lr): def train_input(): return input_module.input_fn( args.training_file, num_epochs=args.num_epochs, shuffle=True, batch_size=args.train_batch_size) def eval_input(): return input_module.input_fn( args.eval_file, num_epochs=100, shuffle=False, batch_size=1) def evaluate_model(estimator): eval_inpf = functools.partial(input_module.input_fn, args.eval_file, num_epochs=1, shuffle=False, batch_size=1) results = estimator.evaluate(eval_inpf) for key,value in sorted(results.items()): print('%s: %0.2f' % (key, value)) def predictions_to_csv(estimator, predictions_on='training'): if predictions_on == 'training': features = functools.partial(input_module.input_fn, args.training_file, num_epochs=1, shuffle=False, batch_size=1, include_labels=False) predictions = [] preds = estimator.predict(input_fn=features) for pred in preds: predictions.append(np.argmax(pred['probabilities'])) predictions = np.asarray(predictions).reshape(len(predictions),1) if predictions_on == 'training': features = pd.read_csv(args.training_file).values results = np.concatenate((features, predictions), axis=1) results = pd.DataFrame(results, columns=input_module.PREDICTIONS_COLUMNS).to_csv(os.getcwd() + '/data/%s-predictions.csv' % predictions_on, index=False) train_spec = tf.estimator.TrainSpec(train_input, max_steps=args.train_steps) exporter = tf.estimator.FinalExporter('model', input_module.SERVING_FUNCTIONS[args.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=100, exporters=[exporter], name='model_eval') run_config = tf.estimator.RunConfig(session_config=_get_session_config_from_env_var()) run_config = run_config.replace(model_dir=args.job_dir) print('Model dir %s' % run_config.model_dir) estimator = model.build_estimator(model_dir=args.job_dir, l1=l1, l2=l2, lr=lr) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) evaluate_model(estimator) predictions_to_csv(estimator)
def run_experiment(hparams): """Run the training and evaluate using the high level API""" train_input = lambda: model.input_fn( hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size ) # Don't shuffle evaluation data eval_input = lambda: model.input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False ) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps ) exporter = tf.estimator.FinalExporter('census', model.SERVING_FUNCTIONS[hparams.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='census-eval' ) run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=hparams.job_dir) print('model dir {}'.format(run_config.model_dir)) estimator = model.build_estimator( embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(args): """ Run the training and evaluate using the high level API. """ def train_input(): """ Input function returning batches from the training data set from training. """ return input_module.input_fn( args.train_files, num_epochs=args.num_epochs, batch_size=args.train_batch_size, num_parallel_calls=args.num_parallel_calls, prefetch_buffer_size=args.prefetch_buffer_size) def eval_input(): """ Input function returning the entire validation data set for evaluation. Shuffling is not required. """ return input_module.input_fn( args.eval_files, batch_size=args.eval_batch_size, shuffle=False, num_parallel_calls=args.num_parallel_calls, prefetch_buffer_size=args.prefetch_buffer_size) train_spec = tf.estimator.TrainSpec(train_input, max_steps=args.train_steps) exporter = tf.estimator.FinalExporter( 'stores', input_module.SERVING_FUNCTIONS[args.export_format]) eval_spec = tf.estimator.EvalSpec(eval_input, steps=args.eval_steps, exporters=[exporter], name='stores-eval') run_config = tf.estimator.RunConfig( session_config=_get_session_config_from_env_var()) run_config = run_config.replace(model_dir=args.job_dir) print('Model dir %s' % run_config.model_dir) estimator = model.build_estimator(config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def _experiment_fn(output_dir): return Experiment(model.build_estimator(output_dir), train_input_fn=model.get_input_fn( filename=os.path.join(data_dir, 'train.tfrecords'), batch_size=train_batch_size), eval_input_fn=model.get_input_fn( filename=os.path.join(data_dir, 'test.tfrecords'), batch_size=eval_batch_size), export_strategies=[ saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1) ], train_steps=train_steps, eval_steps=eval_steps, **experiment_args)
def _experiment_fn(output_dir): input_fn = model.generate_csv_input_fn train_input = input_fn(train_data_paths, num_epochs=num_epochs, batch_size=train_batch_size) eval_input = input_fn(eval_data_paths, batch_size=eval_batch_size, mode=tf.contrib.learn.ModeKeys.EVAL) return Experiment( model.build_estimator(output_dir, hidden_units=hidden_units), train_input_fn=train_input, eval_input_fn=eval_input, export_strategies=[ saved_model_export_utils.make_export_strategy( model.serving_input_fn, default_output_alternative_key=None, exports_to_keep=1) ], eval_metrics=model.get_eval_metrics(), #min_eval_frequency = 1000, # change this to speed up training on large datasets **experiment_args)
def main(hparams): """Run the training and evaluate using the high level API.""" trn_input = lambda: model.input_fn(hparams.train_files, batch_size=hparams.train_batch_size) train_spec = tf.estimator.TrainSpec(trn_input, max_steps=hparams.train_steps) eval_input = lambda: model.input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, ) # Construct our JSON serving function for Online Predictions using GCP. exporter = tf.estimator.FinalExporter('model', model.build_serving_fn()) eval_spec = tf.estimator.EvalSpec( eval_input, throttle_secs=hparams.eval_secs, steps=hparams.eval_steps, exporters=[exporter], ) run_config = tf.estimator.RunConfig() run_config = run_config.replace(model_dir=hparams.job_dir) # Construct layers sizes with exponential decay hidden_units = [ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ] estimator = model.build_estimator( config=run_config, hidden_units=hidden_units, learning_rate=hparams.learning_rate, dropout=hparams.dropout, embedding_vocab_file=hparams.cpc_embedding_vocab_file, embedding_dim=hparams.cpc_embedding_dim, ) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def execute(hypes, metadata, job_directory): data_directory = 'working_dir/data/%s' % (hypes['data_directory']) hypes['data'] = json.loads( storage.get('%s/config.json' % data_directory).decode('utf-8')) storage.write(json.dumps(hypes, indent=2, sort_keys=True), "%s/hypes.json" % job_directory) estimator = model.build_estimator(hypes, metadata, job_directory) train_input_fn = model.get_input_fn(hypes, ModeKeys.TRAIN) train_steps = hypes['epochs'] * data.length(data_directory, ModeKeys.TRAIN) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=train_steps) eval_input_fn = model.get_input_fn(hypes, ModeKeys.EVAL) eval_spec = tf.estimator.EvalSpec( input_fn=eval_input_fn, steps=hypes['eval_steps'], throttle_secs=hypes['eval_throttle_seconds']) # Run the training job tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
net.train(input_fn=input_fn, steps=steps) def predict(net, input_fn): return net.predict(input_fn=input_fn) def evaluate(net, input_fn): return net.evaluate(input_fn=input_fn) def hopt(net, input_fn, steps): pass nn = M.build_estimator(config, **args.__dict__) if 'train' == args.mode: train(nn, input_fn, args.num_steps) elif 'eval' == args.mode: score = evaluate(nn, input_fn) print('score:', score) elif 'predict' == args.mode: prediction = predict(nn, input_fn) for i, p in enumerate(prediction): print(p) if i > 15: break
def train_and_maybe_evaluate(hparams): """Run the training and evaluate using the high level API. Args: hparams: Holds hyperparameters used to train the model as name/value pairs. Returns: The estimator that was used for training (and maybe eval) """ tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir) tag = hparams.tag def train_input(): return model.input_fn(hparams.train_files, tf_transform_output, batch_size=TRAIN_BATCH_SIZE) def eval_input(): return model.input_fn(hparams.eval_files, tf_transform_output, batch_size=EVAL_BATCH_SIZE) train_spec = tf.estimator.TrainSpec(train_input, max_steps=hparams.train_steps) def serving_receiver_fn(): return model.example_serving_receiver_fn(tf_transform_output) exporter = tf.estimator.FinalExporter(tag, serving_receiver_fn) eval_spec = tf.estimator.EvalSpec(eval_input, steps=hparams.eval_steps, exporters=[exporter], name='{}-eval'.format(tag)) run_config = tf.estimator.RunConfig( # save_checkpoints_steps=999, keep_checkpoint_max=1) # serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR) serving_model_dir = os.path.join( hparams.output_dir, '{}_serving'.format(hparams.first_dnn_layer_size)) run_config = run_config.replace(model_dir=serving_model_dir) estimator = model.build_estimator( run_config, # Construct layers sizes with exponetial decay hidden_units=[ max( 2, int(hparams.first_dnn_layer_size * hparams.dnn_decay_factor**i)) for i in range(hparams.num_dnn_layers) ], wide=hparams.wide) estimator = tf.estimator.add_metrics(estimator, my_metric) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) def eval_input_receiver_fn(): return model.eval_input_receiver_fn(tf_transform_output) eval_model_dir = os.path.join( hparams.output_dir, '{}_eval'.format(hparams.first_dnn_layer_size)) tfma.export.export_eval_savedmodel( estimator=estimator, export_dir_base=eval_model_dir, eval_input_receiver_fn=eval_input_receiver_fn) return estimator
def train_and_evaluate(hparams): """Run the training and evaluate using the high level API.""" def train_input(): """Input function returning batches from the training data set from training. """ return input_module.make_training_input_fn( hparams.tft_output_dir, hparams.train_filebase, hparams.weight, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size, buffer_size=hparams.buffer_size, prefetch_buffer_size=hparams.prefetch_buffer_size) def eval_input(): """Input function returning the entire validation data set for evaluation. Shuffling is not required. """ return input_module.make_training_input_fn( hparams.tft_output_dir, hparams.eval_filebase, hparams.weight, shuffle=False, batch_size=hparams.eval_batch_size, buffer_size=hparams.buffer_size, prefetch_buffer_size=hparams.prefetch_buffer_size) train_spec = tf.estimator.TrainSpec( train_input(), max_steps=hparams.train_steps) exporter = tf.estimator.FinalExporter( 'model', input_module.make_serving_input_receiver_fn( hparams.tft_output_dir, hparams.schema_file)) eval_spec = tf.estimator.EvalSpec( eval_input(), steps=hparams.eval_steps, exporters=[exporter], name='model-eval') run_config = tf.estimator.RunConfig( model_dir=os.path.join(hparams.job_dir, hparams.serving_model_dir), session_config=_get_session_config_from_env_var(), save_checkpoints_steps=999, keep_checkpoint_max=1) print('Model dir %s' % run_config.model_dir) estimator = model.build_estimator( tft_output_dir=hparams.tft_output_dir, embedding_size=hparams.embedding_size, # Construct layers sizes with exponential decay weight=hparams.weight, hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)