def visualize_training(experiment_name): model_name = "run" experiment_dir = experiment_name + '_' + model_name training_statistics = load_training_statistics(experiment_name, model_name) output_dir = get_ludwig_output_dir() / 'viz' / experiment_dir list_of_stats = [training_statistics] list_of_models = [experiment_dir] learning_curves( list_of_stats, output_feature_name='recommended', model_names=list_of_models, output_directory=output_dir, file_format='png', ) print(f'{output_dir=}')
def test_learning_curves_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use viz_outputs = ("pdf", "png") with TemporaryDirectory() as tmpvizdir: for viz_output in viz_outputs: vis_output_pattern_pdf = tmpvizdir + f"/*.{viz_output}" visualize.learning_curves([experiment.train_stats], output_feature_name=None, output_directory=tmpvizdir, file_format=viz_output) figure_cnt = glob.glob(vis_output_pattern_pdf) assert 3 == len(figure_cnt)
def test_learning_curves_vis_api(csv_filename): """Ensure pdf and png figures can be saved via visualization API call. :param csv_filename: csv fixture from tests.fixtures.filenames.csv_filename :return: None """ experiment = Experiment(csv_filename) viz_outputs = ('pdf', 'png') for viz_output in viz_outputs: vis_output_pattern_pdf = experiment.output_dir + '/*.{}'.format( viz_output) visualize.learning_curves(experiment.train_stats, output_feature_name=None, output_directory=experiment.output_dir, file_format=viz_output) figure_cnt = glob.glob(vis_output_pattern_pdf) assert 4 == len(figure_cnt) shutil.rmtree(experiment.output_dir, ignore_errors=True)
def visualize_training(): assert False experiment_name = 'rt' model_name = "run" experiment_dir = experiment_name + '_' + model_name training_statistics = load_training_statistics(experiment_name, model_name) output_directory = SCRIPT_DIR / 'output' / 'visualizations' list_of_stats = [training_statistics] list_of_models = [experiment_dir] learning_curves( list_of_stats, output_feature_name='recommended', model_names=list_of_models, output_directory=output_directory, file_format='png', )
def visualize_training_bak(): results_path = SCRIPT_DIR / 'output' / 'results' results_dirs = [d for d in results_path.glob('*') if d.is_dir()] latest_dirs = sorted(results_dirs, key=lambda f: f.name, reverse=True) if len(latest_dirs) == 0: sys.exit('Cannot find results dir in {}'.format(results_path)) experiment_dir = latest_dirs[0] print('get training statistics from {}'.format(experiment_dir)) # training_statistics = load_training_statistics(experiment_dir) output_directory = (SCRIPT_DIR / 'output' / 'visualizations' / experiment_dir.name) # list_of_stats = [training_statistics] list_of_stats: List[str] = [] list_of_models = [experiment_dir.name] learning_curves( list_of_stats, output_feature_name='recommended', model_names=list_of_models, output_directory=output_directory, file_format='png', )
# set up Python dictionary to hold model training parameters model_definition = base_model.copy() model_definition['input_features'][0]['fc_layers'] = model_option.fc_layers model_definition['training']['epochs'] = 8 # Define Ludwig model object that drive model training model = LudwigModel(model_definition, logging_level=logging.INFO) # initiate model training train_stats = model.train(data_csv='./data/mnist_dataset_training.csv', experiment_name='multiple_experiment', model_name=model_option.name) # save training stats for later use list_of_train_stats.append( TrainingResult(name=model_option.name, train_stats=train_stats)) print('>>>>>>> completed: ', model_option.name, '\n') model.close() # generating learning curves from training option_names = [trs.name for trs in list_of_train_stats] train_stats = [trs.train_stats for trs in list_of_train_stats] learning_curves(train_stats, 'Survived', model_names=option_names, output_directory='./visualizations', file_format='png')
shutil.rmtree('./visualizations', ignore_errors=True) # list models to train list_of_model_ids = ['model1', 'model2'] list_of_train_stats = [] # ## Train models for model_id in list_of_model_ids: print('>>>> training: ', model_id) # Define Ludwig model object that drive model training model = LudwigModel(config='./' + model_id + '_config.yaml', logging_level=logging.WARN) # initiate model training train_stats, _, _ = model.train(dataset='./data/train.csv', experiment_name='multiple_experiment', model_name=model_id) # save training stats for later use list_of_train_stats.append(train_stats) print('>>>>>>> completed: ', model_id, '\n') # generating learning curves from training learning_curves(list_of_train_stats, 'Survived', model_names=list_of_model_ids, output_directory='./visualizations', file_format='png')
# set up Python dictionary to hold model training parameters config = base_model.copy() config["input_features"][0]["fc_layers"] = model_option.fc_layers config[TRAINER]["epochs"] = 5 # Define Ludwig model object that drive model training model = LudwigModel(config, logging_level=logging.INFO) # initiate model training train_stats, _, _ = model.train( training_set=training_set, test_set=test_set, experiment_name="multiple_experiment", model_name=model_option.name, ) # save training stats for later use list_of_train_stats.append( TrainingResult(name=model_option.name, train_stats=train_stats)) print(">>>>>>> completed: ", model_option.name, "\n") # generating learning curves from training option_names = [trs.name for trs in list_of_train_stats] train_stats = [trs.train_stats for trs in list_of_train_stats] learning_curves(train_stats, "Survived", model_names=option_names, output_directory="./visualizations", file_format="png")
# list models to train list_of_model_ids = ["model1", "model2"] list_of_train_stats = [] training_set, _, _ = titanic.load(split=True) # ## Train models for model_id in list_of_model_ids: print(">>>> training: ", model_id) # Define Ludwig model object that drive model training model = LudwigModel(config="./" + model_id + "_config.yaml", logging_level=logging.WARN) # initiate model training train_stats, _, _ = model.train(dataset=training_set, experiment_name="multiple_experiment", model_name=model_id) # save training stats for later use list_of_train_stats.append(train_stats) print(">>>>>>> completed: ", model_id, "\n") # generating learning curves from training learning_curves( list_of_train_stats, "Survived", model_names=list_of_model_ids, output_directory="./visualizations", file_format="png", )
# # Example demonstrating visual api # from ludwig.visualize import learning_curves import json # read in training statistics with open('./results_api/api_experiment_run/training_statistics.json') as f: training_stats = json.load(f) # generating learning curves learning_curves(training_stats, 'label', output_directory='./viz_api', file_format='png')
"./profile_images") with open("./config.yaml") as f: config = yaml.safe_load(f.read()) model = LudwigModel(config, logging_level=logging.INFO) train_stats, preprocessed_data, output_directory = model.train( dataset=training_set) # Generates predictions and performance statistics for the test set. test_stats, predictions, output_directory = model.evaluate( test_set, collect_predictions=True, collect_overall_stats=True) confusion_matrix( [test_stats], model.training_set_metadata, "account_type", top_n_classes=[2], model_names=[""], normalize=True, output_directory="./visualizations", file_format="png", ) # Visualizes learning curves, which show how performance metrics changed over time during training. learning_curves(train_stats, output_feature_name="account_type", output_directory="./visualizations", file_format="png")
start_time = time.time() model = LudwigModel(model_definition_file='./LudwigModelDefinitionFile.yml') train_stats = model.train(data_df=breast_cancer_dataset_train, skip_save_model=True, skip_save_processed_input=True, skip_save_training_statistics=True, skip_save_training_description=True, skip_save_log=True, skip_save_progress=True) training_time = time.time() - start_time # Visualize training statistics from ludwig.visualize import learning_curves learning_curves(train_stats, output_feature_name='label') # Predict and print statistics pred = model.predict(data_df=X_test) predictions = pred['label_predictions'] Y_test = Y_test == 1 # Change labels from 0/1 to False/True pred_correct = [] for i in range(1, len(Y_test)): pred_correct.append(predictions.iloc[i - 1] == Y_test.iloc[i - 1]) print("No. of correct predictions = {}".format(sum(pred_correct))) print("No. of incorrect predictions = {}".format( len(Y_test) - sum(pred_correct))) print("Training time = {} seconds".format(round(training_time, 2)))