def evaluate(model, input, target, stats_dir, probs_dir, iteration): """Evaluate a model. Args: model: object output: 2d array, (samples_num, classes_num) target: 2d array, (samples_num, classes_num) stats_dir: str, directory to write out statistics. probs_dir: str, directory to write out output (samples_num, classes_num) iteration: int Returns: None """ # Check if cuda cuda = next(model.parameters()).is_cuda utilities.create_folder(stats_dir) utilities.create_folder(probs_dir) # Predict presence probabilittarget callback_time = time.time() (clips_num, time_steps, freq_bins) = input.shape (input, target) = utilities.transform_data(input, target) output = forward_in_batch(model, input, batch_size=500, cuda=cuda) output = output.data.cpu().numpy() # (clips_num, classes_num) # Write out presence probabilities prob_path = os.path.join(probs_dir, "prob_{}_iters.p".format(iteration)) cPickle.dump(output, open(prob_path, 'wb')) # Calculate statistics stats = utilities.calculate_stats(output, target) # Write out statistics stat_path = os.path.join(stats_dir, "stat_{}_iters.p".format(iteration)) cPickle.dump(stats, open(stat_path, 'wb')) mAP = np.mean([stat['AP'] for stat in stats]) mAUC = np.mean([stat['auc'] for stat in stats]) logging.info("mAP: {:.6f}, AUC: {:.6f}, Callback time: {:.3f} s".format( mAP, mAUC, time.time() - callback_time)) if False: logging.info("Saveing prob to {}".format(prob_path)) logging.info("Saveing stat to {}".format(stat_path))
def train(args): """Train a model. """ data_dir = args.data_dir workspace = args.workspace mini_data = args.mini_data balance_type = args.balance_type learning_rate = args.learning_rate filename = args.filename model_type = args.model_type model = args.model batch_size = args.batch_size # Path of hdf5 data bal_train_hdf5_path = os.path.join(data_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(data_dir, "unbal_train.h5") test_hdf5_path = os.path.join(data_dir, "eval.h5") # Load data load_time = time.time() if mini_data: # Only load balanced data (bal_train_x, bal_train_y, bal_train_id_list) = utilities.load_data(bal_train_hdf5_path) train_x = bal_train_x train_y = bal_train_y train_id_list = bal_train_id_list else: # Load both balanced and unbalanced data (bal_train_x, bal_train_y, bal_train_id_list) = utilities.load_data(bal_train_hdf5_path) (unbal_train_x, unbal_train_y, unbal_train_id_list) = utilities.load_data(unbal_train_hdf5_path) train_x = np.concatenate((bal_train_x, unbal_train_x)) train_y = np.concatenate((bal_train_y, unbal_train_y)) train_id_list = bal_train_id_list + unbal_train_id_list # Test data (test_x, test_y, test_id_list) = utilities.load_data(test_hdf5_path) logging.info("Loading data time: {:.3f} s".format(time.time() - load_time)) logging.info("Training data shape: {}".format(train_x.shape)) # Optimization method optimizer = Adam(lr=learning_rate) model.compile(loss='binary_crossentropy', optimizer=optimizer) # Output directories sub_dir = os.path.join(filename, 'balance_type={}'.format(balance_type), 'model_type={}'.format(model_type)) models_dir = os.path.join(workspace, "models", sub_dir) utilities.create_folder(models_dir) stats_dir = os.path.join(workspace, "stats", sub_dir) utilities.create_folder(stats_dir) probs_dir = os.path.join(workspace, "probs", sub_dir) utilities.create_folder(probs_dir) # Data generator if balance_type == 'no_balance': DataGenerator = data_generator.VanillaDataGenerator elif balance_type == 'balance_in_batch': DataGenerator = data_generator.BalancedDataGenerator else: raise Exception("Incorrect balance_type!") train_gen = DataGenerator(x=train_x, y=train_y, batch_size=batch_size, shuffle=True, seed=1234) iteration = 0 call_freq = 1000 train_time = time.time() for (batch_x, batch_y) in train_gen.generate(): # Compute stats every several interations if iteration % call_freq == 0: logging.info("------------------") logging.info("Iteration: {}, train time: {:.3f} s".format( iteration, time.time() - train_time)) logging.info("Balance train statistics:") evaluate(model=model, input=bal_train_x, target=bal_train_y, stats_dir=os.path.join(stats_dir, 'bal_train'), probs_dir=os.path.join(probs_dir, 'bal_train'), iteration=iteration) logging.info("Test statistics:") evaluate(model=model, input=test_x, target=test_y, stats_dir=os.path.join(stats_dir, "test"), probs_dir=os.path.join(probs_dir, "test"), iteration=iteration) train_time = time.time() # Update params (batch_x, batch_y) = utilities.transform_data(batch_x, batch_y) model.train_on_batch(x=batch_x, y=batch_y) iteration += 1 # Save model save_out_path = os.path.join(models_dir, "md_{}_iters.h5".format(iteration)) model.save(save_out_path) # Stop training when maximum iteration achieves if iteration == 50001: break
def train(args): """Train a model. """ data_dir = args.data_dir workspace = args.workspace mini_data = args.mini_data balance_type = args.balance_type learning_rate = args.learning_rate filename = args.filename model_type = args.model_type model = args.model batch_size = args.batch_size cuda = True # Move model to gpu if cuda: model.cuda() # Path of hdf5 data bal_train_hdf5_path = os.path.join(data_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(data_dir, "unbal_train.h5") test_hdf5_path = os.path.join(data_dir, "eval.h5") # Load data load_time = time.time() if mini_data: # Only load balanced data (bal_train_x, bal_train_y, bal_train_id_list) = utilities.load_data(bal_train_hdf5_path) train_x = bal_train_x train_y = bal_train_y train_id_list = bal_train_id_list else: # Load both balanced and unbalanced data (bal_train_x, bal_train_y, bal_train_id_list) = utilities.load_data(bal_train_hdf5_path) (unbal_train_x, unbal_train_y, unbal_train_id_list) = utilities.load_data(unbal_train_hdf5_path) train_x = np.concatenate((bal_train_x, unbal_train_x)) train_y = np.concatenate((bal_train_y, unbal_train_y)) train_id_list = bal_train_id_list + unbal_train_id_list # Test data (test_x, test_y, test_id_list) = utilities.load_data(test_hdf5_path) logging.info("Loading data time: {:.3f} s".format(time.time() - load_time)) logging.info("Training data shape: {}".format(train_x.shape)) # Optimization method optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-07) # Output directories sub_dir = os.path.join(filename, 'balance_type={}'.format(balance_type), 'model_type={}'.format(model_type)) models_dir = os.path.join(workspace, "models", sub_dir) utilities.create_folder(models_dir) stats_dir = os.path.join(workspace, "stats", sub_dir) utilities.create_folder(stats_dir) probs_dir = os.path.join(workspace, "probs", sub_dir) utilities.create_folder(probs_dir) # Data generator if balance_type == 'no_balance': DataGenerator = data_generator.VanillaDataGenerator elif balance_type == 'balance_in_batch': DataGenerator = data_generator.BalancedDataGenerator else: raise Exception("Incorrect balance_type!") train_gen = DataGenerator(x=train_x, y=train_y, batch_size=batch_size, shuffle=True, seed=1234) iteration = 0 call_freq = 1000 train_time = time.time() for (batch_x, batch_y) in train_gen.generate(): # Compute stats every several interations if iteration % call_freq == 0 and iteration > 1: logging.info("------------------") logging.info("Iteration: {}, train time: {:.3f} s".format( iteration, time.time() - train_time)) logging.info("Balance train statistics:") evaluate(model=model, input=bal_train_x, target=bal_train_y, stats_dir=os.path.join(stats_dir, 'bal_train'), probs_dir=os.path.join(probs_dir, 'bal_train'), iteration=iteration) logging.info("Test statistics:") evaluate(model=model, input=test_x, target=test_y, stats_dir=os.path.join(stats_dir, "test"), probs_dir=os.path.join(probs_dir, "test"), iteration=iteration) train_time = time.time() (batch_x, batch_y) = utilities.transform_data(batch_x, batch_y) batch_x = move_data_to_gpu(batch_x, cuda) batch_y = move_data_to_gpu(batch_y, cuda) # Forward. model.train() output = model(batch_x) # Loss. loss = F.binary_cross_entropy(output, batch_y) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 # Save model. if iteration % 5000 == 0: save_out_dict = { 'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } save_out_path = os.path.join(models_dir, "md_{}_iters.tar".format(iteration)) torch.save(save_out_dict, save_out_path) logging.info("Save model to {}".format(save_out_path)) # Stop training when maximum iteration achieves if iteration == 20001: break
def evaluate(model, input, target, stats_dir, probs_dir, iteration): """Evaluate a model. Args: model: object output: 2d array, (samples_num, classes_num) target: 2d array, (samples_num, classes_num) stats_dir: str, directory to write out statistics. probs_dir: str, directory to write out output (samples_num, classes_num) iteration: int Returns: None """ # Check if cuda cuda = True #cuda = next(model.parameters()).is_cuda utilities.create_folder(stats_dir) utilities.create_folder(probs_dir) # Predict presence probabilittarget callback_time = time.time() (clips_num, time_steps, freq_bins) = input.shape (input, target) = utilities.transform_data(input, target) output, cla, norm_att, mult = forward_in_batch(model, input, batch_size=350, cuda=cuda) output = output.data.cpu().numpy() # (clips_num, classes_num) single = 1 if single == 1: print("output_all cat: ", output.shape) print("cla_all cat: ", cla.shape) print("cla_all cat: ", cla) print("mult_all cat: ", mult) print("norm_att_all cat: ", norm_att) cla = cla.data.cpu().numpy() norm_att = norm_att.data.cpu().numpy() mult = mult.data.cpu().numpy() #for multy multy = 0 if multy == 1: cla = cla.data.cpu().numpy() norm_att = norm_att.data.cpu().numpy() mult = mult.data.cpu().numpy() cla2 = cla2.data.cpu().numpy() norm_att2 = norm_att2.data.cpu().numpy() mult2 = mult2.data.cpu().numpy() print("cla_all cat: ", cla) print("mult_all cat: ", mult) print("norm_att_all cat: ", norm_att) print("cla_all cat: ", cla2) print("mult_all cat: ", mult2) print("norm_att_all cat: ", norm_att2) avg = 0 if avg == 1: print("output_all cat: ", output.shape) print("b2: ", b2.shape) b2 = b2.data.cpu().numpy() ''' # Write out presence probabilities prob_path = os.path.join(probs_dir, "prob_{}_iters.p".format(iteration)) cPickle.dump(output, open(prob_path, 'wb')) # Calculate statistics stats = utilities.calculate_stats(output, target) # Write out statistics stat_path = os.path.join(stats_dir, "stat_{}_iters.p".format(iteration)) cPickle.dump(stats, open(stat_path, 'wb')) mAP = np.mean([stat['AP'] for stat in stats]) mAUC = np.mean([stat['auc'] for stat in stats]) logging.info( "mAP: {:.6f}, AUC: {:.6f}, Callback time: {:.3f} s".format( mAP, mAUC, time.time() - callback_time)) if False: logging.info("Saveing prob to {}".format(prob_path)) logging.info("Saveing stat to {}".format(stat_path)) ''' #Save totest = 0 if totest == 0: #SAVE MODEL dataset = {} dataset['output'] = output if single == 1: dataset['cla'] = cla dataset['norm_att'] = norm_att dataset['mult'] = mult if multy == 1: dataset['cla'] = cla dataset['norm_att'] = norm_att dataset['mult'] = mult dataset['cla2'] = cla2 dataset['norm_att2'] = norm_att2 dataset['mult2'] = mult2 if avg == 1: dataset['b2'] = b2 path = r'C:\Users\AdexGomez\Downloads\Master\third_semester\Deep_Learning\Project\02456_project_audioset_attention\data' file_name = '\multyt.h5' file = path + file_name print(file) f = h5py.File(file, 'w') for k in dataset.keys(): print('\n ' + k + ' type=', type(dataset[k][0])) if k != 'event_label' and k != 'filename': # @@@@temporal, need to be fixed!!! print(' ... saving ' + k + '...') f.create_dataset(k, data=dataset[k])