def test_experiment_model_resume(csv_filename): # Single sequence input, single category output # Tests saving a model file, loading it to rerun training and predict input_features = [sequence_feature(encoder='rnn', reduce_output='sum')] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) model_definition = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'training': { 'epochs': 2 } } exp_dir_name = experiment(model_definition, data_csv=rel_path) logging.info('Experiment Directory: {0}'.format(exp_dir_name)) experiment(model_definition, data_csv=rel_path, model_resume_path=exp_dir_name) full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path) shutil.rmtree(exp_dir_name, ignore_errors=True)
def test_experiment_model_resume(csv_filename): # Single sequence input, single category output # Tests saving a model file, loading it to rerun training and predict input_features = '[{name: utterance, type: sequence, vocab_size: 10,' \ ' max_len: 10, encoder: rnn, reduce_output: sum}]' output_features = "[{name: intent, type: category, vocab_size: 2," \ " reduce_input: sum}] " # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) model_definition = model_definition_template.substitute( input_name=input_features, output_name=output_features) exp_dir_name = experiment(yaml.load(model_definition), data_csv=rel_path) logging.info('Experiment Directory: {0}'.format(exp_dir_name)) experiment(yaml.load(model_definition), data_csv=rel_path, model_resume_path=exp_dir_name) full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path)
def cli(sys_argv): parser = argparse.ArgumentParser( description='This script loads a pretrained model ' 'and tests its performance by comparing' 'its predictions with ground truth.', prog='ludwig test', usage='%(prog)s [options]') # --------------- # Data parameters # --------------- group = parser.add_mutually_exclusive_group(required=True) group.add_argument( '--data_csv', help='input data CSV file. ' 'If it has a split column, it will be used for splitting ' '(0: train, 1: validation, 2: test), ' 'otherwise the dataset will be randomly split') group.add_argument( '--data_hdf5', help='input data HDF5 file. It is an intermediate preprocess version of' ' the input CSV created the first time a CSV file is used in the ' 'same directory with the same name and a hdf5 extension') parser.add_argument( '--train_set_metadata_json', help='input metadata JSON file. It is an intermediate preprocess file ' 'containing the mappings of the input CSV created the first time ' 'a CSV file is used in the same directory with the same name and ' 'a json extension') parser.add_argument('-s', '--split', default=TEST, choices=[TRAINING, VALIDATION, TEST, FULL], help='the split to test the model on') # ---------------- # Model parameters # ---------------- parser.add_argument('-m', '--model_path', help='model to load', required=True) # ------------------------- # Output results parameters # ------------------------- parser.add_argument('-od', '--output_directory', type=str, default='results', help='directory that contains the results') parser.add_argument('-ssuo', '--skip_save_unprocessed_output', help='skips saving intermediate NPY output files', action='store_true', default=False) # ------------------ # Generic parameters # ------------------ parser.add_argument('-bs', '--batch_size', type=int, default=128, help='size of batches') # ------------------ # Runtime parameters # ------------------ parser.add_argument('-g', '--gpus', type=int, default=0, help='list of gpu to use') parser.add_argument( '-gf', '--gpu_fraction', type=float, default=1.0, help='fraction of gpu memory to initialize the process with') parser.add_argument('-uh', '--use_horovod', action='store_true', default=False, help='uses horovod for distributed training') parser.add_argument('-dbg', '--debug', action='store_true', default=False, help='enables debugging mode') parser.add_argument( '-l', '--logging_level', default='info', help='the level of logging to use', choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']) args = parser.parse_args(sys_argv) args.evaluate_performance = True logging.getLogger('ludwig').setLevel( logging_level_registry[args.logging_level]) set_on_master(args.use_horovod) if is_on_master(): print_ludwig('Test', LUDWIG_VERSION) full_predict(**vars(args))