def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
def train(logdir_train1, logdir_train2): # Load model net1 net1_model = Net1(hp.default.phns_len) checkpoint_path1 = '{}/checkpoint.tar'.format(logdir_train1) checkpoint1 = torch.load(checkpoint_path1) if checkpoint1: net1_model.load_state_dict(checkpoint1['model_state_dict']) # Load model net2 net2_model = Net2() checkpoint_path2 = '{}/checkpoint.tar'.format(logdir_train2) checkpoint2 = None epoch = 0 loss = 100 lr = hp.train2.lr optimizer = torch.optim.Adam(net2_model.parameters(), lr=lr) if os.path.exists(checkpoint_path2): checkpoint2 = torch.load(checkpoint_path2) if checkpoint2: train_list, eval_list = load_train_eval_lists(logdir_train2) logger.info("Reuse existing train_list, eval_list from {}".format(logdir_train2)) net2_model.load_state_dict(checkpoint2['model_state_dict']) optimizer.load_state_dict(checkpoint2['optimizer_state_dict']) lr = optimizer.param_groups[0]['lr'] epoch = checkpoint2['epoch'] loss = checkpoint2['loss'] logger.debug("Checkpoint loaded") else: data_dir = hp.train2.data_path train_list, eval_list, _ = generate_data_list(logdir_train2, data_dir, 0.8, 0.1, 0.1) logger.info("Generate new train_list, eval_list, test_list.") net2_model.train() # Set to train mode # Create train/valid loader training_set = Net2DataDir(os.path.join(data_dir,'train')) training_loader = DataLoader(training_set, batch_size=hp.train2.batch_size, shuffle=True, drop_last=True, num_workers=hp.train2.num_workers) logger.debug("Training loader created. Size: {} samples".format(training_set.size)) validation_set = Net2DataDir(os.path.join(data_dir, 'eval')) # If batch_size is inconsistent at the last batch, audio_utils.net2_out_to_pdf fails ''' TODO: not sure if validation_loader requires separate batch size as 'eval2.batch_size' maybe implement later ''' validation_loader = DataLoader(validation_set, batch_size=hp.train2.batch_size, shuffle=True, drop_last=True, num_workers=hp.eval2.num_workers) logger.debug("Validation loader created. Size: {} samples".format(validation_set.size)) # Create criterion criterion = MyMSELoss() logger.debug("Loss type: Sum of MSE loss on mel spectrogram and linear spectrogram") # Run model net2_model, epoch, best_loss = net2_train(checkpoint_path2, net1_model, net2_model, training_loader, validation_loader, criterion, epoch, device=hp.train2.device, lr=lr, loss=loss)
def train(args, logdir): # model model = Net1() preprocessing(data_path) preprocessing(test_path) # dataflow df = Net1DataFlow(data_path, hp.train1.batch_size) df_test = Net1DataFlow(test_path, hp.train1.batch_size) #datas = df.get_data() #print(datas[1]) # set logger for event and model saver logger.set_logger_dir(logdir) #session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # ),) # cv test code # https://github.com/tensorpack/tensorpack/blob/master/examples/boilerplate.py train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=hp.train1.batch_size * 10, n_thread=1)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner( df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'], prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) num_gpu = hp.train1.num_gpu if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) num_gpu = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def train(logdir_train1): # load model net1_model = Net1(hp.default.phns_len) optimizer = torch.optim.Adam(net1_model.parameters(), lr=hp.train1.lr) checkpoint_path = '{}/checkpoint.tar'.format(logdir_train1) checkpoint = None epoch = 0 loss = 100.0 lr = hp.train1.lr if os.path.exists(checkpoint_path): checkpoint = torch.load(checkpoint_path) if checkpoint: net1_model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) lr = optimizer.param_groups[0]['lr'] epoch = checkpoint['epoch'] loss = checkpoint['loss'] logger.debug("Checkpoint loaded") net1_model.train() # Set to train mode # create train/valid loader training_set = Net1TimitData(hp.train1.data_path) training_loader = DataLoader(training_set, batch_size=hp.train1.batch_size, shuffle=True, drop_last=True, num_workers=hp.train1.num_workers) logger.debug("Training loader created. Size: {} samples".format( training_set.size)) validation_set = Net1TimitData(hp.eval1.data_path) validation_loader = DataLoader(validation_set, batch_size=hp.eval1.batch_size, shuffle=True, drop_last=False, num_workers=hp.eval1.num_workers) logger.debug("Evaluation loader created. Size: {} samples".format( validation_set.size)) # create criterion criterion = MaskedCrossEntropyLoss() logger.debug("Loss type: Masked Cross Entropy Loss") # run model net1_model, epoch, loss = net1_train(checkpoint_path, net1_model, training_loader, validation_loader, criterion, epoch, device=hp.train1.device, lr=lr, loss=loss)
def train(args, logdir): # model print("####model") model = Net1() # dataflow print("####dataflow") df = Net1DataFlow(hp.Train1.data_path, hp.Train1.batch_size) # set logger for event and model saver print("####logger") logger.set_logger_dir(logdir) print("####session_conf") session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), allow_soft_placement=True) print("####train_conf") train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=5)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.Train1.num_epochs, steps_per_epoch=hp.Train1.steps_per_epoch, session_config=session_conf) print("####ckpt") ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) print("####trainer") trainer = SyncMultiGPUTrainerReplicated(hp.Train1.num_gpu) print("####launch_train_with_config") launch_train_with_config(train_conf, trainer=trainer)
def init_predictor(ckpt_dir): """ Initializes an OfflinePredictor for the 'Net1' Phoneme classifier, given a directory of tf-checkpoints. :param ckpt_dir: Checkpoint directory. :return: OfflinePredictor """ ckpt1 = tf.train.latest_checkpoint(ckpt_dir) assert ckpt1 is not None, "Failed to load checkpoint in '{}'".format( ckpt_dir) net1 = Net1() pred_conf = PredictConfig( model=net1, input_names=['x_mfccs'], output_names=['net1/ppgs'], session_init=ChainInit([SaverRestore(ckpt1, ignore=['global_step'])])) predictor = OfflinePredictor(pred_conf) return predictor
def train(args, logdir): # model model = Net1() # dataflow TIMIT_TRAIN_WAV = 'TIMIT/TRAIN/*/*/*.npz' TIMIT_TEST_WAV = 'TIMIT/TEST/*/*/*.npz' print(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TRAIN_WAV)) print(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TEST_WAV)) df = Net1DataFlow(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TRAIN_WAV), hp.train1.batch_size) df_test = Net1DataFlow(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TEST_WAV), hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=8)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner(df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'],prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if hp.default.use_gpu == True: os.environ['CUDA_VISIBLE_DEVICES'] = hp.default.gpu_list train_conf.nr_tower = len(hp.default.gpu_list.split(',')) num_gpu = len(hp.default.gpu_list.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: os.environ['CUDA_VISIBLE_DEVICES'] = '' trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def convert(logdir_eval1, logdir_eval2): # Load model net1 net1_model = Net1() checkpoint_path1 = '{}/checkpoint.tar'.format(logdir_eval1) checkpoint1 = torch.load(checkpoint_path1) if checkpoint1: net1_model.load_state_dict(checkpoint1['model_state_dict']) # Load model net2 net2_model = Net2() checkpoint_path2 = '{}/checkpoint.tar'.format(logdir_eval2) checkpoint2 = torch.load(checkpoint_path2) if checkpoint2: net2_model.load_state_dict(checkpoint2['model_state_dict']) # Create conversion source loader conversion_source_set = Net2Data(hp.convert.data_path) conversion_source_loader = DataLoader(conversion_source_set, batch_size=hp.convert.batch_size, shuffle=False, drop_last=False) # Run model spectrogram_batch = convert(net1_model, net2_model, conversion_source_loader)
def train(args, logdir): # model model = Net1() # dataflow df = Net1DataFlow(hp.train1.data_path, hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), ) train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, # session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) trainer = SimpleTrainer() # print('test stop') launch_train_with_config(train_conf, trainer=trainer)
# split baseline data _, _, base_test_set = random_split(baseline_data, [num_training, num_val, num_test], generator=torch.Generator().manual_seed(42)) # split is data _, _, is_test_set = random_split(is_data, [num_training, num_val, num_test], generator=torch.Generator().manual_seed(42)) # create dataloader objects base_test_loader = DataLoader(base_test_set, batch_size=32, shuffle=False) is_test_loader = DataLoader(is_test_set, batch_size=32, shuffle=False) # create models m1 = Net1(name='GCN') m2 = Net2(name='kGNN') m3 = Net3(name='kGNN_TopK') m4 = Net4(name='GAT') baseline_models = [m1, m2, m3, m4] m5 = Net1(name='IS_GCN') m6 = Net2(name='IS_kGNN') m7 = Net3(name='IS_kGNN_TopK') m8 = Net4(name='IS_GAT') is_models = [m5, m6, m7, m8] def eval(log): accuracy, f1_macro, precision, recall = 0, 0, 0, 0
# Import Libraries import torch import pandas as pd from models import Net1 from train_and_evaluate import * from get_data import * # Set up GPU integration device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Only testing model 1 model1 = Net1() model1.to(device) # dataframe to hold results of the transformations trans_train_loss = pd.DataFrame(index=[x for x in range(30)]) trans_train_acc = pd.DataFrame(index=[x for x in range(30)]) trans_val_loss = pd.DataFrame(index=[x for x in range(30)]) trans_val_acc = pd.DataFrame(index=[x for x in range(30)]) no_trans_train_loss = [] no_trans_train_acc = [] no_trans_val_loss = [] no_trans_val_acc = [] flips_only_train_loss = [] flips_only_train_acc = [] flips_only_val_loss = [] flips_only_val_acc = []
batch_size = 50 learning_rate = 0.001 val_split = 0.1 shuffle_data = True seed = 0 save_model = False model_name = 'trained_models/model_ben_mar16_' # Split data into validation and train partitions train_sampler, valid_sampler = test_val_split(dataset, batch_size, val_split, shuffle_data, seed) train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) val_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) # select model model = Net1(num_classes) # Create model and specify loss/optimization functions criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) t2 = time.time() print('Time to load data and split = {:.4f} seconds'.format(t2-t1)) total_step = len(train_loader) print('Beginning training with {} epochs'.format(num_epochs)) ## run training loss_list, batch_list, epoch_list = [], [], [] for epoch in range(num_epochs):
def train(logdir_train1, logdir_train2, logdir_train3): # Load model Net1 for evaluation net1_model = Net1(hp.default.phns_len) checkpoint_path1 = '{}/checkpoint.tar'.format(logdir_train1) checkpoint1 = torch.load(checkpoint_path1) if checkpoint1: net1_model.load_state_dict(checkpoint1['model_state_dict']) # Load model Net2 for evaluation net2_model = Net2() checkpoint_path2 = '{}/checkpoint.tar'.format(logdir_train2) checkpoint2 = torch.load(checkpoint_path2) if checkpoint2: net2_model.load_state_dict(checkpoint2['model_state_dict']) # Load model Net3 for training net3_model = Net3() optimizer = torch.optim.Adam(net3_model.parameters(), lr=hp.train3.lr) checkpoint_path3 = '{}/checkpoint.tar'.format(logdir_train3) checkpoint3 = None if os.path.exists(checkpoint_path3): checkpoint3 = torch.load(checkpoint_path3) epoch = 0 loss = 100.0 lr = hp.train3.lr data_dir = hp.train3.data_path if checkpoint3: logger.info("Reuse existing train_list, eval_list from {}".format( logdir_train3)) net3_model.load_state_dict(checkpoint3['model_state_dict']) optimizer.load_state_dict(checkpoint3['optimizer_state_dict']) lr = optimizer.param_groups[0]['lr'] epoch = checkpoint3['epoch'] loss = checkpoint3['loss'] else: logger.info("Generate new train_list, eval_list, test_list.") net3_model.train() # Set to train mode # Create train/valid loader if hp.train3.multi_speaker: training_set = Net3DataDir(os.path.join(data_dir, 'train', '*'), hp.train3.multi_speaker, k=300) validation_set = Net3DataDir(os.path.join(data_dir, 'eval'), hp.train3.multi_speaker, k=40) else: training_set = Net3DataDir(os.path.join(data_dir, 'train')) validation_set = Net3DataDir(os.path.join(data_dir, 'eval')) training_loader = DataLoader(training_set, batch_size=hp.train3.batch_size, shuffle=True, drop_last=True, num_workers=hp.train3.num_workers) logger.debug("Training loader created. Size: {} samples".format( training_set.size)) validation_loader = DataLoader(validation_set, batch_size=hp.train3.batch_size, shuffle=True, drop_last=True, num_workers=hp.eval3.num_workers) logger.debug("Validation loader created. Size: {}".format( validation_set.size)) # Create criterion criterion = MyMSELoss() logger.debug("Loss type: MSE loss on linear and mel-spectrogram") # Run model net3_model, _, _ = net3_train(checkpoint_path3, net1_model, net2_model, net3_model, training_loader, validation_loader, criterion, starting_epoch=epoch, device=hp.train3.device, lr=lr, loss=loss)