def run_main(config): ''' running MAMC training & validation''' # init model initialize(config) training_model = build_training_model(config) validation_model = build_validation_model(config) # print network graph """ # full-graph mamc_graph = net_drawer.GetPydotGraph( validation_model.net.Proto().op, "mamc_graph", rankdir="TB", ) mamc_graph.write_svg("mamc_no_npairloss_graph.svg") print("write graph over...") sys.exit(0) # # mini-graph # mamc_graph_mini = net_drawer.GetPydotGraphMinimal( # validation_model.net.Proto().op, # "mamc_graph_minimal", # rankdir="TB", # minimal_dependency=True # ) # mamc_graph_mini.write_svg("mamc_no_npairloss_graph_mini.svg") # print("write graph over...") # sys.exit(0) """ # experiment params config # training mode # tag = "imagenet" tag = "cars196" if config['finetune']: tag = 'FINETUNE-{}'.format(tag) else: tag = 'RETRAIN-{}'.format(tag) root_experiments_dir = os.path.join(config['root_dir'], 'experiments') experiment = Experiment(root_experiments_dir, tag) experiment.add_config_file(config['config_path']) # add chart chart_acc = experiment.add_chart('accuracy', xlabel='epochs', ylabel='accuracy') chart_acc_5 = experiment.add_chart('accuracy_5', xlabel='epochs', ylabel='accuracy_5') chart_softmax_loss = experiment.add_chart('softmax_loss', xlabel='epochs', ylabel='softmax_loss') # plot params (should be added into 'experiment module' # TODO add 'variable' object to Experiment class training_acc_statistics = [] training_acc5_statistics = [] training_softmax_loss_statistics = [] epoch_training_acc = 0 epoch_training_acc5 = 0 epoch_training_softmax_loss = 0 training_accuracy = 0 training_accuracy_5 = 0 training_softmax_loss = 0 validation_acc_statistics = [] validation_acc5_statistics = [] validation_softmax_loss_statistics = [] # run the model experiment.add_log( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) for training_iter in tqdm(range(config['solver']['max_iterations'])): workspace.RunNet(training_model.net) accuracy = workspace.FetchBlob('accuracy') accuracy_5 = workspace.FetchBlob('accuracy_5') softmax_loss = workspace.FetchBlob('softmax_loss') epoch_training_acc += accuracy epoch_training_acc5 += accuracy_5 epoch_training_softmax_loss += softmax_loss training_accuracy += accuracy training_accuracy_5 += accuracy_5 training_softmax_loss += softmax_loss # display training result if training_iter != 0 and (training_iter + 1) % config['solver']['display'] == 0: experiment.add_log("[TRAIN] epoch: {} iteration: {} accuracy: {:.4f} "\ "accuracy_5: {:.4f} softmax_loss: {:.4f}".format( (training_iter // config['solver']['train_iterations'] + 1), training_iter, training_accuracy / config['solver']['display'], training_accuracy_5 / config['solver']['display'], training_softmax_loss / config['solver']['display'], )) experiment.add_log("Global learning rate: {}".format( workspace.FetchBlob( 'MultiPrecisionSgdOptimizer_0_lr_gpu{}'.format( config['gpu_id'])))) # cleanup the counters training_accuracy = training_accuracy_5 = training_softmax_loss = 0 # plot training statistics every epoch if training_iter != 0 and ( training_iter + 1) % config['solver']['train_iterations'] == 0: training_acc_statistics.append( epoch_training_acc / config['solver']['train_iterations']) training_acc5_statistics.append( epoch_training_acc5 / config['solver']['train_iterations']) training_softmax_loss_statistics.append( epoch_training_softmax_loss / config['solver']['train_iterations']) epoch_training_acc = 0 epoch_training_acc5 = 0 epoch_training_softmax_loss = 0 experiment.add_plot(chart_acc, training_acc_statistics, 'r.--', 'training') experiment.add_plot(chart_acc_5, training_acc5_statistics, 'r.--', 'training') experiment.add_plot(chart_softmax_loss, training_softmax_loss_statistics, 'b+--', 'training') # snapshot training model params if training_iter != 0 and (training_iter + 1) % config['solver']['snapshot'] == 0: # save the model weights print("[INFO] snapshot the model..... ") experiment.add_init_net_snapshot( training_model.GetAllParams(), workspace, config['name'], tag, (training_iter // config['solver']['train_iterations'] + 1), ) print("[INFO] snapshot the model. Done.....") # start to validate the model if training_iter != 0 and (training_iter + 1) % config['solver']['test_interval'] == 0: test_accuracy = 0 test_accuracy_5 = 0 test_softmax_loss = 0 for test_iter in range(config['solver']['test_iterations']): workspace.RunNet(validation_model.net) accuracy = workspace.FetchBlob('accuracy') accuracy_5 = workspace.FetchBlob('accuracy_5') softmax_loss = workspace.FetchBlob('softmax_loss') # update counter test_accuracy += accuracy test_accuracy_5 += accuracy_5 test_softmax_loss += softmax_loss experiment.add_log("[VALIDATION] accuracy: {:.4f} accuracy_5: {:.4f} "\ "softmax_loss: {:.4f}".format( accuracy, accuracy_5, softmax_loss)) # end validation experiment.add_log( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) experiment.add_log("[VALIDATION] avg_acc: {:.4f} avg_acc_5: {:.4f} "\ "avg_softmax_loss: {:.4f}".format( test_accuracy / config['solver']['test_iterations'], test_accuracy_5 / config['solver']['test_iterations'], test_softmax_loss / config['solver']['test_iterations'], ) ) # plot validation statistics validation_acc_statistics.append( test_accuracy / config['solver']['test_iterations']) validation_acc5_statistics.append( test_accuracy_5 / config['solver']['test_iterations']) validation_softmax_loss_statistics.append( test_softmax_loss / config['solver']['test_iterations']) experiment.add_plot(chart_acc, validation_acc_statistics, 'c.--', 'validation') experiment.add_plot(chart_acc_5, validation_acc5_statistics, 'c.--', 'validation') experiment.add_plot(chart_softmax_loss, validation_softmax_loss_statistics, 'g+--', 'validation') experiment.add_log( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" )
def run_main(config): ''' running MAMC training & validation''' # init model initialize(config) # build model training_model = build_training_model(config) validation_model = build_validation_model(config) # print network graph # generate_network_graph(validation_model, config, use_mini=True) # generate_network_graph(validation_model, config, use_mini=False) # generate_network_graph(validation_model, config, tag="topk", use_mini=True) # generate_network_graph(validation_model, config, tag="topk", use_mini=False) # sys.exit(0) # experiment params config root_experiments_dir = os.path.join(config['root_dir'], 'experiments') assert(config['dataset_name'] is not None) if config['model_name'] is not None: root_experiments_dir = os.path.join( root_experiments_dir, "{}-{}".format( config['dataset_name'], config['model_name'], ), ) else: root_experiments_dir = os.path.join( root_experiments_dir, config['dataset_name']) config_file_name = os.path.splitext( os.path.basename(config['config_path']))[0] tag = "{}-{}".format(config['name'], config_file_name) experiment = Experiment(root_experiments_dir, tag) experiment.add_config_file(config['config_path']) # add chart chart_acc = experiment.add_chart('accuracy', xlabel='epochs', ylabel='accuracy') chart_acc_5 = experiment.add_chart('accuracy_5', xlabel='epochs', ylabel='accuracy_5') chart_softmax_loss = experiment.add_chart('softmax_loss', xlabel='epochs', ylabel='softmax_loss') # plot params (should be added into 'experiment module' # TODO add 'variable' object to Experiment class training_acc_statistics = [] training_acc5_statistics = [] training_softmax_loss_statistics = [] epoch_training_acc = 0 epoch_training_acc5 = 0 epoch_training_softmax_loss = 0 training_accuracy = 0 training_accuracy_5 = 0 training_softmax_loss = 0 validation_acc_statistics = [] validation_acc5_statistics = [] validation_softmax_loss_statistics = [] best_acc = 0 # run the model experiment.add_log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") for training_iter in tqdm(range(config['solver']['max_iterations'])): workspace.RunNet(training_model.net) accuracy = workspace.FetchBlob('accuracy') accuracy_5 = workspace.FetchBlob('accuracy_5') softmax_loss = workspace.FetchBlob('softmax_loss') epoch_training_acc += accuracy epoch_training_acc5 += accuracy_5 epoch_training_softmax_loss += softmax_loss training_accuracy += accuracy training_accuracy_5 += accuracy_5 training_softmax_loss += softmax_loss # display training result if training_iter != 0 and (training_iter + 1) % config['solver']['display'] == 0: experiment.add_log("[TRAIN] epoch: {} iteration: {} accuracy: {:.4f} "\ "accuracy_5: {:.4f} softmax_loss: {:.4f}".format( (training_iter // config['solver']['train_iterations'] + 1), training_iter, training_accuracy / config['solver']['display'], training_accuracy_5 / config['solver']['display'], training_softmax_loss / config['solver']['display'], )) experiment.add_log("Global learning rate: {}".format( workspace.FetchBlob('MultiPrecisionSgdOptimizer_0_lr_gpu{}'.format(config['gpu_id'])))) # cleanup the counters training_accuracy = training_accuracy_5 = training_softmax_loss = 0 # plot training statistics every epoch if training_iter != 0 and (training_iter + 1) % config['solver']['train_iterations'] == 0: training_acc_statistics.append(epoch_training_acc / config['solver']['train_iterations']) training_acc5_statistics.append(epoch_training_acc5 / config['solver']['train_iterations']) training_softmax_loss_statistics.append(epoch_training_softmax_loss / config['solver']['train_iterations']) epoch_training_acc = 0 epoch_training_acc5 = 0 epoch_training_softmax_loss = 0 experiment.add_plot(chart_acc, training_acc_statistics, 'r.--', 'training') experiment.add_plot(chart_acc_5, training_acc5_statistics, 'r.--', 'training') experiment.add_plot(chart_softmax_loss, training_softmax_loss_statistics, 'b+--', 'training') # start to validate the model if training_iter != 0 and (training_iter + 1) % config['solver']['test_interval'] == 0: test_accuracy = 0 test_accuracy_5 = 0 test_softmax_loss = 0 test_loss = 0 for test_iter in range(config['solver']['test_iterations']): workspace.RunNet(validation_model.net) accuracy = workspace.FetchBlob('accuracy') accuracy_5 = workspace.FetchBlob('accuracy_5') softmax_loss = workspace.FetchBlob('softmax_loss') # update counter test_accuracy += accuracy test_accuracy_5 += accuracy_5 test_softmax_loss += softmax_loss experiment.add_log("[VALIDATION] accuracy: {:.4f} accuracy_5: {:.4f} "\ "softmax_loss: {:.4f}".format( accuracy, accuracy_5, softmax_loss)) # end validation if test_accuracy / config['solver']['test_iterations'] > best_acc: best_acc = test_accuracy / config['solver']['test_iterations'] experiment.add_log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") experiment.add_log("[VALIDATION] avg_acc: {:.4f} best_acc: {:.4f} avg_acc_5: {:.4f} "\ "avg_softmax_loss: {:.4f}".format( test_accuracy / config['solver']['test_iterations'], best_acc, test_accuracy_5 / config['solver']['test_iterations'], test_softmax_loss / config['solver']['test_iterations'], ) ) # snapshot training model params print("[INFO] snapshot the model..... ") experiment.add_init_net_snapshot( training_model.GetAllParams(), workspace, config, (training_iter // config['solver']['train_iterations'] + 1), test_accuracy / config['solver']['test_iterations'], best_acc, ) print("[INFO] snapshot the model. Done.....") # plot validation statistics validation_acc_statistics.append(test_accuracy / config['solver']['test_iterations']) validation_acc5_statistics.append(test_accuracy_5 / config['solver']['test_iterations']) validation_softmax_loss_statistics.append(test_softmax_loss / config['solver']['test_iterations']) experiment.add_plot(chart_acc, validation_acc_statistics, 'c.--', 'validation') experiment.add_plot(chart_acc_5, validation_acc5_statistics, 'c.--', 'validation') experiment.add_plot(chart_softmax_loss, validation_softmax_loss_statistics, 'g+--', 'validation') experiment.add_log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")