def get_inputs(params): """Returns some parameters used by the model.""" if FLAGS.download_if_missing and not FLAGS.use_synthetic_data: movielens.download(FLAGS.dataset, FLAGS.data_dir) if FLAGS.seed is not None: np.random.seed(FLAGS.seed) if FLAGS.use_synthetic_data: producer = data_pipeline.DummyConstructor() num_users, num_items = data_preprocessing.DATASET_TO_NUM_USERS_AND_ITEMS[ FLAGS.dataset] num_train_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH num_eval_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH else: num_users, num_items, producer = data_preprocessing.instantiate_pipeline( dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params, constructor_type=FLAGS.constructor_type, deterministic=FLAGS.seed is not None) num_train_steps = producer.train_batches_per_epoch num_eval_steps = producer.eval_batches_per_epoch return num_users, num_items, num_train_steps, num_eval_steps, producer
def run_movie(flags_obj): """Construct all necessary functions and call run_loop. Args: flags_obj: Object containing user specified flags. """ if flags_obj.download_if_missing: movielens.download(dataset=flags_obj.dataset, data_dir=flags_obj.data_dir) train_input_fn, eval_input_fn, model_column_fn = \ movielens_dataset.construct_input_fns( dataset=flags_obj.dataset, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, repeat=flags_obj.epochs_between_evals) tensors_to_log = { 'loss': '{loss_prefix}head/weighted_loss/value' } wide_deep_run_loop.run_loop( name="MovieLens", train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, model_column_fn=model_column_fn, build_estimator_fn=build_estimator, flags_obj=flags_obj, tensors_to_log=tensors_to_log, early_stop=False)
def prepare_raw_data(flag_obj): """Downloads and prepares raw data for data generation.""" movielens.download(flag_obj.dataset, flag_obj.data_dir) data_processing_params = { "train_epochs": flag_obj.num_train_epochs, "batch_size": flag_obj.train_prebatch_size, "eval_batch_size": flag_obj.eval_prebatch_size, "batches_per_step": 1, "stream_files": True, "num_neg": flag_obj.num_negative_samples, } num_users, num_items, producer = data_preprocessing.instantiate_pipeline( dataset=flag_obj.dataset, data_dir=flag_obj.data_dir, params=data_processing_params, constructor_type=flag_obj.constructor_type, epoch_dir=flag_obj.data_dir, generate_data_offline=True) # pylint: disable=protected-access input_metadata = { "num_users": num_users, "num_items": num_items, "constructor_type": flag_obj.constructor_type, "num_train_elements": producer._elements_in_epoch, "num_eval_elements": producer._eval_elements_in_epoch, "num_train_epochs": flag_obj.num_train_epochs, "train_prebatch_size": flag_obj.train_prebatch_size, "eval_prebatch_size": flag_obj.eval_prebatch_size, "num_train_steps": producer.train_batches_per_epoch, "num_eval_steps": producer.eval_batches_per_epoch, } # pylint: enable=protected-access return producer, input_metadata
def main(_): movielens.download(dataset=flags.FLAGS.dataset, data_dir=flags.FLAGS.data_dir) construct_input_fns(flags.FLAGS.dataset, flags.FLAGS.data_dir)