def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: '1' | '2' | '3' | '4' | 'none', set to none if using all data without validation to train model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = None # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), model_type, 'holdout_fold={}'.format(holdout_fold)) create_folder(checkpoints_dir) # All folds result should write to the same directory temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), model_type) create_folder(temp_submissions_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, filemode='w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() ''' # Uncomment for evaluating on training dataset train_statistics = evaluator.evaluate( data_type='train', metadata_dir=metadata_dir, submissions_dir=temp_submissions_dir, max_validate_num=max_validate_num) ''' if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', metadata_dir=metadata_dir, submissions_dir=temp_submissions_dir, max_validate_num=max_validate_num) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 5000: break iteration += 1
def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters dataset_dir = args.dataset_dir subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type holdout_fold = args.holdout_fold iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second sources = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_train.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy #checkpoint = torch.load(checkpoint_path) #model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) if subtask in ['a', 'c']: evaluator.evaluate(data_type='validate', source='a', verbose=True) elif subtask == 'b': evaluator.evaluate(data_type='validate', source='a', verbose=True) evaluator.evaluate(data_type='validate', source='b', verbose=True) evaluator.evaluate(data_type='validate', source='c', verbose=True) # Visualize log mel spectrogram if visualize: evaluator.visualize(data_type='validate', source='a')
hf.create_dataset(name='fold', data=folds, dtype=np.int32) hf.close() logging.info('Write out hdf5 file to {}'.format(hdf5_path)) logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time)) if __name__ == '__main__': parser = argparse.ArgumentParser(description='') subparsers = parser.add_subparsers(dest='mode') parser_logmel = subparsers.add_parser('logmel') parser_logmel.add_argument('--workspace', type=str, required=True) parser_logmel.add_argument('--scene_type', type=str, required=True) parser_logmel.add_argument('--snr', type=int, required=True) args = parser.parse_args() logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__)) create_folder(logs_dir) logging = create_logging(logs_dir, filemode='w') logging.info(args) if args.mode == 'logmel': calculate_logmel_features(args) else: raise Exception('Incorrect arguments!')
def pack_waveforms_to_hdf5(args): """Pack waveforms to a single hdf5 file. """ # Arguments & parameters audios_dir = args.audios_dir csv_path = args.csv_path waveform_hdf5_path = args.waveform_hdf5_path target_hdf5_path = args.target_hdf5_path mini_data = args.mini_data audio_length = config.audio_length classes_num = config.classes_num sample_rate = config.sample_rate # Paths if mini_data: prefix = 'mini_' waveform_hdf5_path += '.mini' target_hdf5_path += '.mini' else: prefix = '' create_folder(os.path.dirname(waveform_hdf5_path)) create_folder(os.path.dirname(target_hdf5_path)) logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format( prefix, get_filename(csv_path)) create_folder(logs_dir) create_logging(logs_dir, filemode='w') logging.info('Write logs to {}'.format(logs_dir)) # Read csv file meta_dict = read_metadata(csv_path) if mini_data: mini_num = 10 for key in meta_dict.keys(): meta_dict[key] = meta_dict[key][0:mini_num] audios_num = len(meta_dict['audio_name']) # Pack waveform to hdf5 total_time = time.time() with h5py.File(waveform_hdf5_path, 'w') as hf: hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20') hf.create_dataset('waveform', shape=((audios_num, audio_length)), dtype=np.int16) hf.create_dataset('target', shape=((audios_num, classes_num)), dtype=np.bool) hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32) # Read audio for n in range(audios_num): audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n]) if os.path.isfile(audio_path): logging.info('{} {}'.format(n, audio_path)) (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) audio = pad_or_truncate(audio, audio_length) hf['audio_name'][n] = meta_dict['audio_name'][n].encode() hf['waveform'][n] = float32_to_int16(audio) hf['target'][n] = meta_dict['target'][n] else: logging.info('{} File does not exist! {}'.format( n, audio_path)) # Pack target to hdf5 hdf5_name = target_hdf5_path.split('/')[-1] with h5py.File(target_hdf5_path, 'w') as target_hf: target_hf.create_dataset('audio_name', data=hf['audio_name'][:], dtype='S20') target_hf.create_dataset('hdf5_name', data=[hdf5_name.encode()] * audios_num, dtype='S40') target_hf.create_dataset('index_in_hdf5', data=np.arange(audios_num), dtype=np.int32) target_hf.create_dataset('target', data=hf['target'][:], dtype=np.bool) logging.info('Write to {}'.format(waveform_hdf5_path)) logging.info('Write to {}'.format(target_hdf5_path)) logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
def train(args): # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size few_shots = args.few_shots random_seed = args.random_seed resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' mini_data = args.mini_data filename = args.filename loss_func = get_loss_func(loss_type) pretrain = True if pretrained_checkpoint_path else False num_workers = 16 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join(workspace, 'features', '{}training.waveform.h5'.format(prefix)) test_hdf5_path = os.path.join(workspace, 'features', 'testing.waveform.h5'.format(prefix)) evaluate_hdf5_path = os.path.join(workspace, 'features', 'evaluation.waveform.h5'.format(prefix)) test_reference_csv_path = os.path.join(dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv') evaluate_reference_csv_path = os.path.join(dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv') checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base)) create_folder(checkpoints_dir) tmp_submission_path = os.path.join(workspace, '_tmp_submission', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), '_submission.csv') create_folder(os.path.dirname(tmp_submission_path)) statistics_path = os.path.join(workspace, 'statistics', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) predictions_dir = os.path.join(workspace, 'predictions', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'batch_size={}'.format(batch_size)) create_folder(predictions_dir) logs_dir = os.path.join(workspace, 'logs', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'few_shots={}'.format(few_shots), 'random_seed={}'.format(random_seed), 'freeze_base={}'.format(freeze_base), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num) # Statistics statistics_container = StatisticsContainer(statistics_path) if pretrain: logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) if resume_iteration: resume_checkpoint_path = os.path.join(checkpoints_dir, '{}_iterations.pth'.format(resume_iteration)) logging.info('Load resume model from {}'.format(resume_checkpoint_path)) resume_checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(resume_checkpoint['model']) statistics_container.load_state_dict(resume_iteration) iteration = resume_checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in device: model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_dataset = DCASE2017Task4Dataset(hdf5_path=train_hdf5_path) test_dataset = DCASE2017Task4Dataset(hdf5_path=test_hdf5_path) evaluate_dataset = DCASE2017Task4Dataset(hdf5_path=evaluate_hdf5_path) train_sampler = TrainSampler( hdf5_path=train_hdf5_path, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size, few_shots=few_shots, random_seed=random_seed) test_sampler = EvaluateSampler(dataset_size=len(test_dataset), batch_size=batch_size) evaluate_sampler = EvaluateSampler(dataset_size=len(evaluate_dataset), batch_size=batch_size) collector = Collator() train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_sampler=train_sampler, collate_fn=collector, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_sampler=test_sampler, collate_fn=collector, num_workers=num_workers, pin_memory=True) evaluate_loader = torch.utils.data.DataLoader(dataset=evaluate_dataset, batch_sampler=evaluate_sampler, collate_fn=collector, num_workers=num_workers, pin_memory=True) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator test_evaluator = Evaluator( model=model, generator=test_loader) evaluate_evaluator = Evaluator( model=model, generator=evaluate_loader) train_bgn_time = time.time() # Train on mini batches for batch_data_dict in train_loader: # Evaluate if iteration % 1000 == 0: if resume_iteration > 0 and iteration == resume_iteration: pass else: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for (data_type, evaluator, reference_csv_path) in [ ('test', test_evaluator, test_reference_csv_path), ('evaluate', evaluate_evaluator, evaluate_reference_csv_path)]: logging.info('{} statistics:'.format(data_type)) (statistics, predictions) = evaluator.evaluate( reference_csv_path, tmp_submission_path) statistics_container.append(data_type, iteration, statistics) prediction_path = os.path.join(predictions_dir, '{}_iterations.prediction.{}.h5'.format(iteration, data_type)) write_out_prediction(predictions, prediction_path) statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0 and iteration > 49999: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict()} checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(len(batch_data_dict['waveform'])) # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Train model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) batch_target_dict = {'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda'])} else: batch_output_dict = model(batch_data_dict['waveform'], None) batch_target_dict = {'target': batch_data_dict['target']} # loss loss = loss_func(batch_output_dict, batch_target_dict) print(iteration, loss) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == stop_iteration: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename plt_x = [] plt_y = [] mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 best_inde = {} best_inde['micro_auprc'] = np.array([0.0]) best_inde['micro_f1'] = np.array([0.0]) best_inde['macro_auprc'] = np.array([0.0]) best_inde['average_precision'] = np.array([0.0]) best_inde['sum'] = best_inde['micro_auprc'] + best_inde[ 'micro_f1'] + best_inde['macro_auprc'] best_map = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) if iteration > 5000: if best_map < np.mean(train_statistics['average_precision']): best_map = np.mean(train_statistics['average_precision']) logging.info('best_map= {}'.format(best_map)) # logging.info('iter= {}'.format(iteration)) checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'indicators': train_statistics } checkpoint_path = os.path.join(checkpoints_dir, 'best2.pth') torch.save(checkpoint, checkpoint_path) logging.info( 'best_models saved to {}'.format(checkpoint_path)) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) feature, spacetime, targets_a, targets_b, lam = mixup_data( batch_data_dict['feature'], batch_data_dict['spacetime'], batch_data_dict['{}_target'.format(taxonomy_level)], alpha=1.0, use_cuda=True) # Train model.train() criterion = nn.BCELoss().cuda() batch_output = model(feature, spacetime) # loss #batch_target = batch_data_dict['{}_target'.format(taxonomy_level)] loss = mixup_criterion(criterion, batch_output, targets_a, targets_b, lam) #loss = binary_cross_entropy(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() if iteration % 100 == 0: plt_x.append(iteration) plt_y.append(loss.item()) if iteration % 10000 == 0 and iteration != 0: plt.figure(1) plt.suptitle('test result ', fontsize='18') plt.plot(plt_x, plt_y, 'r-', label='loss') plt.legend(loc='best') plt.savefig( '/home/fangjunyan/count/' + time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) + '{}'.format(holdout_fold) + '{}.jpg'.format(taxonomy_level)) # Stop learning if iteration == 10000: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_train.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_left = os.path.join( workspace, 'features_left', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_right = os.path.join( workspace, 'features_right', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_side = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_left = os.path.join( workspace, 'scalars_left', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_right = os.path.join( workspace, 'scalars_right', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_side = os.path.join( workspace, 'scalars_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) scalar_left = load_scalar(scalar_path_left) scalar_right = load_scalar(scalar_path_right) scalar_side = load_scalar(scalar_path_side) # Model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, feature_hdf5_path_left=feature_hdf5_path_left, feature_hdf5_path_right=feature_hdf5_path_right, feature_hdf5_path_side=feature_hdf5_path_side, train_csv=train_csv, validate_csv=validate_csv, scalar=scalar, scalar_left=scalar_left, scalar_right=scalar_right, scalar_side=scalar_side, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict, batch_data_dict_left, batch_data_dict_right, batch_data_dict_side in data_generator.generate_train( ): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate(data_type='train', source=source, max_iteration=None, verbose=False) for source in sources_to_evaluate: validate_statistics = evaluator.evaluate(data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) for key in batch_data_dict_left.keys(): if key in ['feature_left', 'target']: batch_data_dict_left[key] = move_data_to_gpu( batch_data_dict_left[key], cuda) for key in batch_data_dict_right.keys(): if key in ['feature_right', 'target']: batch_data_dict_right[key] = move_data_to_gpu( batch_data_dict_right[key], cuda) for key in batch_data_dict_side.keys(): if key in ['feature_side', 'target']: batch_data_dict_side[key] = move_data_to_gpu( batch_data_dict_side[key], cuda) # Train model.train() batch_output = model(data=batch_data_dict['feature'], data_left=batch_data_dict_left['feature_left'], data_right=batch_data_dict_right['feature_right'], data_side=batch_data_dict_side['feature_side']) # loss loss = loss_func(batch_output, batch_data_dict['target']) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 15000: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename plt_x = [] plt_y = [] T_max = 300 mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) def mixup_data(x1, x2, y, alpha=1.0, use_cuda=True): # 数据增强,看下那个博客 '''Returns mixed inputs, pairs of targets, and lambda''' if alpha > 0: lam = np.random.beta(alpha, alpha) # 随机生成一个(1,1)的张量 else: lam = 1 # batch_size = x1.size()[0] if use_cuda: index = torch.randperm( batch_size).cuda() # 给定参数n,返回一个从0到n-1的随机整数序列 else: index = torch.randperm(batch_size) # 使用cpu还是gpu mixed_x1 = lam * x1 + (1 - lam) * x1[index, :] mixed_x2 = lam * x2 + (1 - lam) * x2[index, :] # 混合数据 y_a, y_b = y, y[index] return mixed_x1, mixed_x2, y_a, y_b, lam def mixup_criterion(criterion, pred, y_a, y_b, lam): return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) loss_path = os.path.join( workspace, 'loss', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(loss_path) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) logging.info( " Space_Duo_Cnn_9_Avg 多一层 258*258 不共用FC,必须带时空标签 用loss 监测,使用去零one hot " ) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) logging.info('model parm:{} '.format( sum(param.numel() for param in model.parameters()))) #计算模型参数量 # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 best_inde = {} best_inde['micro_auprc'] = np.array([0.0]) best_inde['micro_f1'] = np.array([0.0]) best_inde['macro_auprc'] = np.array([0.0]) best_inde['average_precision'] = np.array([0.0]) best_inde['sum'] = best_inde['micro_auprc'] + best_inde[ 'micro_f1'] + best_inde['macro_auprc'] last_loss1 = [] last_loss2 = [] last_loss = [] best_map = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) if iteration > 5000: if best_map < np.mean(train_statistics['average_precision']): best_map = np.mean(train_statistics['average_precision']) logging.info('best_map= {}'.format(best_map)) # logging.info('iter= {}'.format(iteration)) checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'indicators': train_statistics } checkpoint_path = os.path.join(checkpoints_dir, 'best7.pth') torch.save(checkpoint, checkpoint_path) logging.info( 'best_models saved to {}'.format(checkpoint_path)) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 batch_data2_dict = batch_data_dict.copy() n = [] for i, l in enumerate(batch_data2_dict['coarse_target']): k = 0 for j in range(0, 8): if l[j] > 0.6: l[j] = 1 else: l[j] = 0 k += 1 if k == 8: if taxonomy_level == 'coarse': n.append(i) for i, l in enumerate(batch_data2_dict['fine_target']): k = 0 for j in range(0, 29): if l[j] > 0.6: l[j] = 1 else: l[j] = 0 k += 1 if k == 29: if taxonomy_level == 'fine': n.append(i) batch_data2_dict['fine_target'] = np.delete( batch_data2_dict['fine_target'], n, axis=0) batch_data2_dict['coarse_target'] = np.delete( batch_data2_dict['coarse_target'], n, axis=0) batch_data2_dict['audio_name'] = np.delete( batch_data2_dict['audio_name'], n, axis=0) batch_data2_dict['feature'] = np.delete(batch_data2_dict['feature'], n, axis=0) batch_data2_dict['spacetime'] = np.delete( batch_data2_dict['spacetime'], n, axis=0) if batch_data2_dict['audio_name'].size == 0: iteration += 1 continue #使用 概率数据请注释下行,使用去零onehot数据不用注释 batch_data_dict = batch_data2_dict # if iteration <8655: # batch_data_dict = batch_data2_dict # elif iteration >=8655 and iteration % 2 == 0: # batch_data_dict = batch_data2_dict # Move data to GPU ,'external_target','external_feature' for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() # 使用mix_up 数据增强 feature1, spacetime1, targets1_a, targets1_b, lam1 = mixup_data( batch_data_dict['feature'], batch_data_dict['spacetime'], batch_data_dict['fine_target'], alpha=1.0, use_cuda=True) feature2, spacetime2, targets2_a, targets2_b, lam2 = mixup_data( batch_data_dict['feature'], batch_data_dict['spacetime'], batch_data_dict['coarse_target'], alpha=1.0, use_cuda=True) batch_output1 = model.forward1(feature1, spacetime1) batch_output2 = model.forward2(feature2, spacetime2) lam1 = int(lam1) lam2 = int(lam2) loss1 = (lam1 * binary_cross_entropy(batch_output1, targets1_a) + (1 - lam1) * binary_cross_entropy(batch_output1, targets1_b)) loss2 = (lam2 * binary_cross_entropy(batch_output2, targets2_a) + (1 - lam2) * binary_cross_entropy(batch_output2, targets2_b)) #不使用mix_up 数据增强,请使用以下代码 # batch_target1 = batch_data_dict['fine_target'] # batch_output1 = model.forward1(batch_data_dict['feature'], batch_data_dict['spacetime']) # batch_target2 = batch_data_dict['coarse_target'] # batch_output2 = model.forward2(batch_data_dict['feature'], batch_data_dict['spacetime']) # loss1 = binary_cross_entropy(batch_output1, batch_target1) # loss2 = binary_cross_entropy(batch_output2, batch_target2) loss = loss1 + loss2 #使用loss监测请使用以下代码否者注释 if iteration > 4320: new_loss = loss.item() if len(last_loss) < 5: last_loss.append(new_loss) else: cha = 0 for i in range(4): cha += abs(last_loss[i + 1] - last_loss[i]) if new_loss > last_loss[4] and cha >= (new_loss - last_loss[4]) > cha / 2: for i in range(4): last_loss[i] = last_loss[i + 1] last_loss[4] = new_loss logging.info(' drop iteration:{}'.format(iteration)) iteration += 1 continue elif new_loss > last_loss[4] and (new_loss - last_loss[4]) > cha / 2.75: for i in range(4): last_loss[i] = last_loss[i + 1] last_loss[4] = new_loss logging.info(' low weightiteration:{}'.format(iteration)) loss = loss / 2 else: for i in range(4): last_loss[i] = last_loss[i + 1] last_loss[4] = new_loss # # Backward optimizer.zero_grad() loss.backward() optimizer.step() if iteration % 50 == 0: plt_x.append(iteration) plt_y.append(loss) if iteration % 13000 == 0 and iteration != 0: plt.figure(1) plt.suptitle('test result ', fontsize='18') plt.plot(plt_x, plt_y, 'r-', label='loss') plt.legend(loc='best') plt.savefig( loss_path + '/' + time.strftime('%m%d_%H%M%S', time.localtime(time.time())) + 'loss.jpg') plt.savefig(loss_path + '/loss.jpg') # Stop learning if iteration == 13000: # logging.info("best_micro_auprc:{:.3f}".format(best_inde['micro_auprc'])) # logging.info("best_micro_f1:{:.3f}".format(best_inde['micro_f1'])) # logging.info("best_macro_auprc:{:.3f}".format(best_inde['macro_auprc'])) # labels = get_labels(taxonomy_level) # for k, label in enumerate(labels): # logging.info(' {:<40}{:.3f}'.format(label, best_inde['average_precision'][k])) break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second max_iteration = 500 # Number of mini-batches to evaluate on training data reduce_lr = False # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 500 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() # Evaluate on partial of train data logging.info('Train statistics:') for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='train', target_source=target_source, max_iteration=max_iteration, verbose=False) # Evaluate on holdout validation data if holdout_fold != 'none': logging.info('Validate statistics:') for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=False) validate_statistics_container.append( iteration, target_source, validate_curated_statistics) validate_statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'mask', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output = model(batch_data_dict['feature']) # loss loss = binary_cross_entropy(batch_output, batch_data_dict['target']) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 20000: break iteration += 1
def pack_maps_dataset_to_hdf5(args): """MAPS is a piano dataset only used for evaluating our piano transcription system (optional). Ref: [1] Emiya, Valentin. "MAPS Database A piano database for multipitch estimation and automatic transcription of music. 2016 Load & resample MAPS audio files, then write to hdf5 files. Args: dataset_dir: str, directory of dataset workspace: str, directory of your workspace """ # Arguments & parameters dataset_dir = args.dataset_dir workspace = args.workspace sample_rate = config.sample_rate pianos = ['ENSTDkCl', 'ENSTDkAm'] # Paths waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maps') logs_dir = os.path.join(workspace, 'logs', get_filename(__file__)) create_logging(logs_dir, filemode='w') logging.info(args) feature_time = time.time() count = 0 # Load & resample each audio file to a hdf5 file for piano in pianos: sub_dir = os.path.join(dataset_dir, piano, 'MUS') audio_names = [os.path.splitext(name)[0] for name in os.listdir(sub_dir) if os.path.splitext(name)[-1] == '.mid'] for audio_name in audio_names: print('{} {}'.format(count, audio_name)) audio_path = '{}.wav'.format(os.path.join(sub_dir, audio_name)) midi_path = '{}.mid'.format(os.path.join(sub_dir, audio_name)) (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) midi_dict = read_maps_midi(midi_path) packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(audio_name)) create_folder(os.path.dirname(packed_hdf5_path)) with h5py.File(packed_hdf5_path, 'w') as hf: hf.attrs.create('split', data='test'.encode(), dtype='S20') hf.attrs.create('midi_filename', data='{}.mid'.format(audio_name).encode(), dtype='S100') hf.attrs.create('audio_filename', data='{}.wav'.format(audio_name).encode(), dtype='S100') hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100') hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32) hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16) count += 1 logging.info('Write hdf5 to {}'.format(packed_hdf5_path)) logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
def train(args): """Train and evaluate. Args: dataset_dir: str workspace: str holdout_fold: '1' model_type: str, e.g., 'Cnn_9layers_Gru_FrameAtt' loss_type: str, e.g., 'clip_bce' augmentation: str, e.g., 'mixup' learning_rate, float batch_size: int resume_iteration: int stop_iteration: int device: 'cuda' | 'cpu' mini_data: bool """ # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' mini_data = args.mini_data filename = args.filename loss_func = get_loss_func(loss_type) num_workers = 8 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join(workspace, 'hdf5s', '{}training.h5'.format(prefix)) test_hdf5_path = os.path.join(workspace, 'hdf5s', '{}testing.h5'.format(prefix)) evaluate_hdf5_path = os.path.join(workspace, 'hdf5s', 'evaluation.h5'.format(prefix)) test_reference_csv_path = os.path.join( dataset_dir, 'metadata', 'groundtruth_strong_label_testing_set.csv') evaluate_reference_csv_path = os.path.join( dataset_dir, 'metadata', 'groundtruth_strong_label_evaluation_set.csv') checkpoints_dir = os.path.join(workspace, 'checkpoints', '{}{}'.format( prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) tmp_submission_path = os.path.join( workspace, '_tmp_submission', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '_submission.csv') create_folder(os.path.dirname(tmp_submission_path)) statistics_path = os.path.join(workspace, 'statistics', '{}{}'.format( prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', '{}{}'.format(prefix, filename), 'holdout_fold={}'.format(holdout_fold), 'model_type={}'.format(model_type), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model assert model_type, 'Please specify model_type!' Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num) if resume_iteration: resume_checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(resume_iteration)) logging.info( 'Load resume model from {}'.format(resume_checkpoint_path)) resume_checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(resume_checkpoint['model']) statistics_container.load_state_dict(resume_iteration) iteration = resume_checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in device: model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Dataset dataset = DCASE2017Task4Dataset() # Sampler train_sampler = TrainSampler(hdf5_path=train_hdf5_path, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size) test_sampler = TestSampler(hdf5_path=test_hdf5_path, batch_size=batch_size) evaluate_sampler = TestSampler(hdf5_path=evaluate_hdf5_path, batch_size=batch_size) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=test_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) evaluate_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=evaluate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator evaluator = Evaluator(model=model) # Statistics statistics_container = StatisticsContainer(statistics_path) train_bgn_time = time.time() # Train on mini batches for batch_data_dict in train_loader: # Evaluate if (iteration % 1000 == 0 and iteration > resume_iteration): # or (iteration == 0): logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for (data_type, data_loader, reference_csv_path) in [ ('test', test_loader, test_reference_csv_path), ('evaluate', evaluate_loader, evaluate_reference_csv_path) ]: # Calculate tatistics (statistics, _) = evaluator.evaluate(data_loader, reference_csv_path, tmp_submission_path) logging.info('{} statistics:'.format(data_type)) logging.info(' Clipwise mAP: {:.3f}'.format( np.mean(statistics['clipwise_ap']))) logging.info(' Framewise mAP: {:.3f}'.format( np.mean(statistics['framewise_ap']))) logging.info(' {}'.format( statistics['sed_metrics']['overall']['error_rate'])) statistics_container.append(data_type, iteration, statistics) statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 10000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda( batch_size=len(batch_data_dict['waveform'])) # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Train model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } else: batch_output_dict = model(batch_data_dict['waveform'], None) batch_target_dict = {'target': batch_data_dict['target']} # loss loss = loss_func(batch_output_dict, batch_target_dict) print(iteration, loss) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == stop_iteration: break iteration += 1
def pack_maestro_dataset_to_hdf5(args): """Load & resample MAESTRO audio files, then write to hdf5 files. Args: dataset_dir: str, directory of dataset workspace: str, directory of your workspace """ # Arguments & parameters dataset_dir = args.dataset_dir workspace = args.workspace sample_rate = config.sample_rate # Paths csv_path = os.path.join(dataset_dir, 'maestro-v2.0.0.csv') waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro') logs_dir = os.path.join(workspace, 'logs', get_filename(__file__)) create_logging(logs_dir, filemode='w') logging.info(args) # Read meta dict meta_dict = read_metadata(csv_path) audios_num = len(meta_dict['canonical_composer']) logging.info('Total audios number: {}'.format(audios_num)) feature_time = time.time() # Load & resample each audio file to a hdf5 file for n in range(audios_num): logging.info('{} {}'.format(n, meta_dict['midi_filename'][n])) # Read midi midi_path = os.path.join(dataset_dir, meta_dict['midi_filename'][n]) midi_dict = read_midi(midi_path) # Load audio audio_path = os.path.join(dataset_dir, meta_dict['audio_filename'][n]) (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format( os.path.splitext(meta_dict['audio_filename'][n])[0])) create_folder(os.path.dirname(packed_hdf5_path)) with h5py.File(packed_hdf5_path, 'w') as hf: hf.attrs.create('canonical_composer', data=meta_dict['canonical_composer'][n].encode(), dtype='S100') hf.attrs.create('canonical_title', data=meta_dict['canonical_title'][n].encode(), dtype='S100') hf.attrs.create('split', data=meta_dict['split'][n].encode(), dtype='S20') hf.attrs.create('year', data=meta_dict['year'][n].encode(), dtype='S10') hf.attrs.create('midi_filename', data=meta_dict['midi_filename'][n].encode(), dtype='S100') hf.attrs.create('audio_filename', data=meta_dict['audio_filename'][n].encode(), dtype='S100') hf.attrs.create('duration', data=meta_dict['duration'][n], dtype=np.float32) hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100') hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32) hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16) logging.info('Write hdf5 to {}'.format(packed_hdf5_path)) logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
def train(args): # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' filename = args.filename num_workers = 8 loss_func = get_loss_func(loss_type) pretrain = True if pretrained_checkpoint_path else False #TODO вернуть путь до полного набора обработанных данных hdf5_path = os.path.join(workspace, 'features_ramas', 'waveform_meta_test.h5') # hdf5_path = os.path.join(workspace, 'features', 'waveform.h5') checkpoints_dir = os.path.join(workspace, 'checkpoints') create_folder(checkpoints_dir) statistics_path = os.path.join(workspace, 'statistics', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model Model = eval(model_type) #TODO захардкодил classes num- это нехорошо model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, 4, freeze_base) # Statistics statistics_container = StatisticsContainer(statistics_path) if pretrain: logging.info( 'Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) dataset = GtzanDataset() validate_sampler = EvaluateSampler(hdf5_path=hdf5_path, holdout_fold=holdout_fold, batch_size=1) validate_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=validate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) if 'cuda' in device: model.to(device) # Evaluator evaluator = Evaluator(model=model) torch.manual_seed(729720439) statistics, output_dict = evaluator.evaluate(validate_loader) logging.info('Validate precision: {:.3f}'.format(statistics['precision'])) logging.info('Validate recall: {:.3f}'.format(statistics['recall'])) logging.info('Validate f_score: {:.3f}'.format(statistics['f_score'])) logging.info('\n' + str(statistics['cm'])) df_audio = pd.read_csv( '/home/den/DATASETS/AUDIO/preprocessed/ramas/meta_test.csv') df_audio = df_audio[df_audio['cur_label'].isin( ['ang', 'hap', 'sad', 'neu'])] temp_df = pd.DataFrame(columns=['cur_name', 'hap', 'ang', 'sad', 'neu']) temp_df['cur_name'] = output_dict['audio_name'] temp_df.loc[:, ['hap', 'ang', 'sad', 'neu']] = np.vstack( output_dict['clipwise_output2']) merge_df = pd.merge(df_audio, temp_df, on='cur_name', how='inner') merge_df.to_csv( '/home/den/Documents/diploma/panns/panns_ramas_inference.csv', index=False)
def train(args): # Arugments & parameters window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base freeze_base = True device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' sample_rate = config.sample_rate classes_num = config.classes_num pretrain = True if pretrained_checkpoint_path else False # Model Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num, freeze_base) # Load pretrained model if pretrain: logging.info( 'Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in device: model.to(device) print('Load pretrained model successfully!') ###############Copying main.py#################### workspace_input = args.workspace_input workspace_output = args.workspace_output data_type = 'balanced_train' loss_type = 'clip_bce' balanced = 'balanced' augmentation = 'none' batch_size = 1 learning_rate = 1e-3 resume_iteration = 0 early_stop = 100000 device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 8 clip_samples = config.clip_samples loss_func = get_loss_func(loss_type) black_list_csv = 'metadata/black_list/groundtruth_weak_label_evaluation_set.csv' previous_loss = None train_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes', '{}.h5'.format(data_type)) eval_bal_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes', 'balanced_train.h5') eval_test_indexes_hdf5_path = os.path.join(workspace_input, 'hdf5s', 'indexes', 'eval.h5') checkpoints_dir = os.path.join( workspace_output, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join( workspace_output, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace_output, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num, freeze_base=freeze_base) params_num = count_parameters(model) # flops_num = count_flops(model, clip_samples) logging.info('Parameters num: {}'.format(params_num)) # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9)) # Dataset will be used by DataLoader later. Dataset takes a meta as input # and return a waveform and a target. dataset = AudioSetDataset(clip_samples=clip_samples, classes_num=classes_num) # Train sampler (train_sampler, train_collector) = get_train_sampler( balanced, augmentation, workspace_input + 'hdf5s/indexes/balanced_train.h5', black_list_csv, batch_size) # Evaluate sampler eval_bal_sampler = EvaluateSampler(indexes_hdf5_path=workspace_input + 'hdf5s/indexes/balanced_train.h5', batch_size=batch_size) eval_test_sampler = EvaluateSampler(indexes_hdf5_path=workspace_input + 'hdf5s/indexes/eval.h5', batch_size=batch_size) eval_collector = Collator(mixup_alpha=None) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=train_collector, num_workers=num_workers, pin_memory=True) eval_bal_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_bal_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) eval_test_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_test_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) # Evaluator bal_evaluator = Evaluator(model=model, generator=eval_bal_loader) test_evaluator = Evaluator(model=model, generator=eval_test_loader) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_bgn_time = time.time() if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace_input, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) if torch.cuda.is_available(): checkpoint = torch.load(resume_checkpoint_path) else: checkpoint = torch.load(resume_checkpoint_path, map_location='cpu') model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) time1 = time.time() for iterate_n, batch_data_dict in enumerate(train_loader): """batch_data_dict: { 'audio_name': (batch_size [*2 if mixup],), 'waveform': (batch_size [*2 if mixup], clip_samples), 'target': (batch_size [*2 if mixup], classes_num), (ifexist) 'mixup_lambda': (batch_size * 2,)} """ # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == 0): train_fin_time = time.time() bal_statistics = bal_evaluator.evaluate() test_statistics = test_evaluator.evaluate() logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 20000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Forward model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" loss = loss_func(batch_output_dict, batch_target_dict) # Loss # try: # loss = loss_func(batch_output_dict, batch_target_dict) # except: # tensor = batch_output_dict['clipwise_output'].detach().numpy() # arr = -1. * np.where(tensor > 0,0.,tensor) # batch_output_dict['clipwise_output'] = torch.tensor(np.where(arr > 1,1.,arr),requires_grad=True) # loss = loss_func(batch_output_dict, batch_target_dict) # Backward loss.backward() optimizer.step() optimizer.zero_grad() if iteration % 10 == 0: print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\ .format(iteration, time.time() - time1)) time1 = time.time() iteration += 1 # Stop learning if iteration == early_stop: break
def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' iteration: int holdout_fold: '1', which means using validation data batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type iteration = args.iteration holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'submission.csv') create_folder(os.path.dirname(submission_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=True) # Evaluate on validation data evaluator.evaluate(data_type='validate', submission_path=submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) # Visualize if visualize: evaluator.visualize(data_type='validate')
def inference_validation(args): '''Inference validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates calculating metrics on all 1, 2, 3 and 4 folds. model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool visualize: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() visualize = args.visualize mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') submissions_dir = os.path.join( workspace, 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'iteration={}'.format(iteration)) create_folder(submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Inference and calculate metrics for a fold if holdout_fold != -1: features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') checkoutpoint_path = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format( model_name, '', audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold), 'md_{}_iters.pth'.format(iteration)) # Load scalar scalar = load_scalar(scalar_path) # Load model checkpoint = torch.load(checkoutpoint_path) model = checkpoint['model'] if cuda: model.cuda() # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Calculate metrics data_type = 'validate' list_dict = evaluator.evaluate(data_type=data_type) evaluator.metrics(list_dict=list_dict, submissions_dir=submissions_dir, metadata_dir=metadata_dir) # Visualize reference and predicted events, elevation and azimuth if visualize: evaluator.visualize(data_type=data_type) # Calculate metrics for all folds else: prediction_names = os.listdir(submissions_dir) prediction_paths = [os.path.join(submissions_dir, name) for \ name in prediction_names] metrics = calculate_metrics(metadata_dir=metadata_dir, prediction_paths=prediction_paths) logging.info('Metrics of {} files: '.format(len(prediction_names))) for key in metrics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', metrics[key]))
def inference_evaluation(args): '''Inference on evaluation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' iteration: int holdout_fold: 'none', which means using model trained on all development data batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type iteration = args.iteration holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' evaluate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'evaluate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth') submission_path = os.path.join( workspace, 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'best2_submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path, scalar=scalar, batch_size=batch_size) # Forward output_dict = forward(model=model, generate_func=data_generator.generate(), cuda=cuda, return_target=False) # Write submission write_submission_csv(audio_names=output_dict['audio_name'], outputs=output_dict['output'], taxonomy_level=taxonomy_level, submission_path=submission_path)
def train(args): '''Train. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace audio_type: 'foa' | 'mic' holdout_fold: 1 | 2 | 3 | 4 | -1, where -1 indicates using all data without validation for training model_name: string, e.g. 'Cnn_9layers' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace audio_type = args.audio_type holdout_fold = args.holdout_fold model_name = args.model_name batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num max_validate_num = 10 # Number of audio recordings to validate reduce_lr = True # Reduce learning rate after several iterations # Paths if mini_data: prefix = 'minidata_' else: prefix = '' metadata_dir = os.path.join(dataset_dir, 'metadata_dev') features_dir = os.path.join( workspace, 'features', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins)) scalar_path = os.path.join( workspace, 'scalars', '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type, 'dev', frames_per_second, mel_bins), 'scalar.h5') models_dir = os.path.join( workspace, 'models', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_folder(models_dir) temp_submissions_dir = os.path.join( workspace, '_temp', 'submissions', filename, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins)) create_folder(temp_submissions_dir) logs_dir = os.path.join( args.workspace, 'logs', filename, args.mode, '{}_{}{}_{}_logmel_{}frames_{}melbins'.format(model_name, prefix, audio_type, 'dev', frames_per_second, mel_bins), 'holdout_fold={}'.format(holdout_fold)) create_logging(logs_dir, filemode='w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_name) model = Model(classes_num) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) # Data generator data_generator = DataGenerator(features_dir=features_dir, scalar=scalar, batch_size=batch_size, holdout_fold=holdout_fold) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, max_validate_num=max_validate_num, cuda=cuda) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 100 == 0: logging.info('------------------------------------') logging.info('iteration: {}'.format(iteration)) train_fin_time = time.time() train_list_dict = evaluator.evaluate(data_type='train') evaluator.metrics(train_list_dict, temp_submissions_dir, metadata_dir) if holdout_fold != -1: validate_list_dict = evaluator.evaluate(data_type='validate') evaluator.metrics(validate_list_dict, temp_submissions_dir, metadata_dir) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model, 'optimizer': optimizer } save_path = os.path.join(models_dir, 'md_{}_iters.pth'.format(iteration)) torch.save(checkpoint, save_path) logging.info('Model saved to {}'.format(save_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train model.train() batch_output_dict = model(batch_data_dict['feature']) loss = event_spatial_loss(batch_output_dict, batch_data_dict) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 10000: break iteration += 1
def train(args): """Train AudioSet tagging model. Args: dataset_dir: str workspace: str data_type: 'balanced_train' | 'full_train' window_size: int hop_size: int mel_bins: int model_type: str loss_type: 'clip_bce' balanced: 'none' | 'balanced' | 'alternate' augmentation: 'none' | 'mixup' batch_size: int learning_rate: float resume_iteration: int early_stop: int accumulation_steps: int cuda: bool """ # Arugments & parameters workspace = args.workspace data_type = args.data_type sample_rate = args.sample_rate window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type loss_type = args.loss_type balanced = args.balanced augmentation = args.augmentation batch_size = args.batch_size learning_rate = args.learning_rate resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 128 prefetch_factor = 4 #os.environ["MASTER_ADDR"] = "localhost" #os.environ["MASTER_PORT"] = "12355" #dist.init_process_group("nccl", rank=rank, world_size=args.world_size) clip_samples = config.clip_samples classes_num = config.classes_num loss_func = get_loss_func(loss_type) # Paths black_list_csv = None train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', '{}.h5'.format(data_type)) eval_bal_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'balanced_train.h5') eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'eval.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), datetime.datetime.now().strftime("%d%m%Y_%H%M%S")) #if rank == 0: create_folder(checkpoints_dir) statistics_path = os.path.join( workspace, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') #if rank == 0: create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU. Set --cuda flag to use GPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) params_num = count_parameters(model) # flops_num = count_flops(model, clip_samples) logging.info('Parameters num: {}'.format(params_num)) # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9)) # Dataset will be used by DataLoader later. Dataset takes a meta as input # and return a waveform and a target. dataset = AudioSetDataset(sample_rate=sample_rate) # Train sampler if balanced == 'none': Sampler = TrainSampler elif balanced == 'balanced': Sampler = BalancedTrainSampler elif balanced == 'alternate': Sampler = AlternateTrainSampler train_sampler = Sampler(indexes_hdf5_path=train_indexes_hdf5_path, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size, black_list_csv=black_list_csv) # Evaluate sampler eval_bal_sampler = EvaluateSampler( indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=2 * batch_size) eval_test_sampler = EvaluateSampler( indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=2 * batch_size) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True, prefetch_factor=prefetch_factor) eval_bal_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_bal_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True, prefetch_factor=prefetch_factor) eval_test_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_test_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True, prefetch_factor=prefetch_factor) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator evaluator = Evaluator(model=model) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_bgn_time = time.time() # Resume training if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) #model = model.cuda(rank) #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) #print([(s[0], s[1].is_cuda) for s in model.named_parameters()]) time1 = time.time() prev_bal_map = 0.0 prev_test_map = 0.0 save_bal_model = 0 save_test_model = 0 for batch_data_dict in train_loader: """batch_data_dict: { 'audio_name': (batch_size [*2 if mixup],), 'waveform': (batch_size [*2 if mixup], clip_samples), 'target': (batch_size [*2 if mixup], classes_num), (ifexist) 'mixup_lambda': (batch_size * 2,)} """ #print(batch_data_dict) # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == -1): train_fin_time = time.time() bal_statistics = evaluator.evaluate(eval_bal_loader) test_statistics = evaluator.evaluate(eval_test_loader) logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) save_bal_model = 1 if np.mean( bal_statistics['average_precision']) > prev_bal_map else 0 save_test_model = 1 if np.mean( test_statistics['average_precision']) > prev_test_map else 0 statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 100000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if save_bal_model: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations_bal.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) save_bal_model = 0 if save_test_model: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations_test.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) save_test_model = 0 # Mixup lambda if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda( batch_size=len(batch_data_dict['waveform'])) # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Forward model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" # Loss loss = loss_func(batch_output_dict, batch_target_dict) # Backward loss.backward() print(loss) optimizer.step() optimizer.zero_grad() if iteration % 10 == 0: print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\ .format(iteration, time.time() - time1)) time1 = time.time() # Stop learning if iteration == early_stop: break iteration += 1
def pack_waveforms_to_hdf5(args): """Pack waveform and target of several audio clips to a single hdf5 file. This can speed up loading and training. """ # Arguments & parameters audios_dir = args.audios_dir csv_path = args.csv_path waveforms_hdf5_path = args.waveforms_hdf5_path mini_data = args.mini_data clip_samples = config.clip_samples classes_num = config.classes_num sample_rate = config.sample_rate id_to_ix = config.id_to_ix # Paths if mini_data: prefix = 'mini_' waveforms_hdf5_path += '.mini' else: prefix = '' create_folder(os.path.dirname(waveforms_hdf5_path)) logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format( prefix, get_filename(csv_path)) create_folder(logs_dir) create_logging(logs_dir, filemode='w') logging.info('Write logs to {}'.format(logs_dir)) # Read csv file meta_dict = read_metadata(csv_path, classes_num, id_to_ix) if mini_data: mini_num = 10 for key in meta_dict.keys(): meta_dict[key] = meta_dict[key][0:mini_num] audios_num = len(meta_dict['audio_name']) # Pack waveform to hdf5 total_time = time.time() with h5py.File(waveforms_hdf5_path, 'w') as hf: hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20') hf.create_dataset('waveform', shape=((audios_num, clip_samples)), dtype=np.int16) hf.create_dataset('target', shape=((audios_num, classes_num)), dtype=np.bool) hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32) # Pack waveform & target of several audio clips to a single hdf5 file for n in range(audios_num): audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n]) if os.path.isfile(audio_path): logging.info('{} {}'.format(n, audio_path)) (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True) audio = pad_or_truncate(audio, clip_samples) hf['audio_name'][n] = meta_dict['audio_name'][n].encode() hf['waveform'][n] = float32_to_int16(audio) hf['target'][n] = meta_dict['target'][n] else: logging.info('{} File does not exist! {}'.format( n, audio_path)) logging.info('Write to {}'.format(waveforms_hdf5_path)) logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
def train(args): """Train AudioSet tagging model. Args: dataset_dir: str workspace: str data_type: 'balanced_train' | 'unbalanced_train' frames_per_second: int mel_bins: int model_type: str loss_type: 'bce' balanced: bool augmentation: str batch_size: int learning_rate: float resume_iteration: int early_stop: int accumulation_steps: int cuda: bool """ # Arugments & parameters # dataset_dir = args.dataset_dir workspace = args.workspace data_type = args.data_type window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type loss_type = args.loss_type balanced = args.balanced augmentation = args.augmentation batch_size = args.batch_size learning_rate = args.learning_rate resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 0 sample_rate = config.sample_rate audio_length = config.audio_length classes_num = config.classes_num assert loss_type == 'clip_bce' # Paths black_list_csv = os.path.join(workspace, 'black_list', 'dcase2017task4.csv') waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'waveforms') # Target hdf5 path eval_train_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets', 'balanced_train.h5') eval_test_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets', 'eval.h5') if data_type == 'balanced_train': train_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets', 'balanced_train.h5') elif data_type == 'full_train': train_targets_hdf5_path = os.path.join(workspace, 'hdf5s', 'targets', 'full_train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join( workspace, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model model = Cnn13(audio_length, sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num) model.summary() logging.info('Parameters number: {}'.format(model.count_params())) # Optimizer optimizer = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=True) # Loss loss = keras.losses.binary_crossentropy model.compile(loss=loss, optimizer=optimizer) # Dataset will be used by DataLoader later. Provide an index and return # waveform and target of audio train_dataset = AudioSetDataset(target_hdf5_path=train_targets_hdf5_path, waveform_hdf5s_dir=waveform_hdf5s_dir, audio_length=audio_length, classes_num=classes_num) bal_dataset = AudioSetDataset( target_hdf5_path=eval_train_targets_hdf5_path, waveform_hdf5s_dir=waveform_hdf5s_dir, audio_length=audio_length, classes_num=classes_num) test_dataset = AudioSetDataset( target_hdf5_path=eval_test_targets_hdf5_path, waveform_hdf5s_dir=waveform_hdf5s_dir, audio_length=audio_length, classes_num=classes_num) # Sampler if balanced == 'balanced': if 'mixup' in augmentation: train_sampler = BalancedSamplerMixup( target_hdf5_path=train_targets_hdf5_path, black_list_csv=black_list_csv, batch_size=batch_size, start_mix_epoch=1) train_collector = Collator(mixup_alpha=1.) assert batch_size % torch.cuda.device_count( ) == 0, 'To let mixup working properly this must be satisfied.' else: train_sampler = BalancedSampler( target_hdf5_path=train_targets_hdf5_path, black_list_csv=black_list_csv, batch_size=batch_size) train_collector = Collator(mixup_alpha=None) bal_sampler = EvaluateSampler(dataset_size=len(bal_dataset), batch_size=batch_size) test_sampler = EvaluateSampler(dataset_size=len(test_dataset), batch_size=batch_size) eval_collector = Collator(mixup_alpha=None) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_sampler=train_sampler, collate_fn=train_collector, num_workers=num_workers, pin_memory=True) bal_loader = torch.utils.data.DataLoader(dataset=bal_dataset, batch_sampler=bal_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_sampler=test_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) # Evaluator bal_evaluator = Evaluator(model=model, generator=bal_loader) test_evaluator = Evaluator(model=model, generator=test_loader) # Statistics statistics_container = StatisticsContainer(statistics_path) train_bgn_time = time.time() # Resume training if resume_iteration > 0: resume_weights_path = os.path.join( checkpoints_dir, '{}_iterations.weights.h5'.format(resume_iteration)) resume_sampler_path = os.path.join( checkpoints_dir, '{}_iterations.sampler.h5'.format(resume_iteration)) iteration = resume_iteration model.load_weights(resume_weights_path) sampler_state_dict = cPickle.load(open(resume_sampler_path, 'rb')) train_sampler.load_state_dict(sampler_state_dict) statistics_container.load_state_dict(resume_iteration) else: iteration = 0 t_ = time.time() for batch_data_dict in train_loader: # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == 0): train_fin_time = time.time() bal_statistics = bal_evaluator.evaluate() test_statistics = test_evaluator.evaluate() logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model # if iteration % 20000 == 0 and iteration > resume_iteration: if iteration == 10: weights_path = os.path.join( checkpoints_dir, '{}_iterations.weights.h5'.format(iteration)) sampler_path = os.path.join( checkpoints_dir, '{}_iterations.sampler.h5'.format(iteration)) model.save_weights(weights_path) cPickle.dump(train_sampler.state_dict(), open(sampler_path, 'wb')) logging.info('Model weights saved to {}'.format(weights_path)) logging.info('Sampler saved to {}'.format(sampler_path)) ''' if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) batch_target_dict = {'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda'])} else: batch_output_dict = model(batch_data_dict['waveform'], None) batch_target_dict = {'target': batch_data_dict['target']} ''' loss = model.train_on_batch(x=batch_data_dict['waveform'], y=batch_data_dict['target']) print(iteration, loss) iteration += 1 # Stop learning if iteration == early_stop: break
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' | 'none', set `none` for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type n_epoch = args.n_epoch batch_size = args.batch_size valid_source = args.valid_source pretrained = args.pretrained cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second max_iteration = 500 # Number of mini-batches to evaluate on training data reduce_lr = False # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if pretrained == 'none': checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') else: checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume') create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume', 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}'.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type, 'resume') create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) if pretrained != 'none': model.load_state_dict(torch.load(pretrained)['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Calculate total iteration required for n_epoch iter_per_epoch = np.ceil( len(data_generator.train_segments_indexes) / batch_size).astype(int) total_iter = iter_per_epoch * n_epoch # Define Warm-up LR scheduler epoch_to_warm = 10 epoch_to_flat = 200 def _warmup_lr(optimizer, iteration, iter_per_epoch, epoch_to_warm, min_lr=0, max_lr=0.0035): delta = (max_lr - min_lr) / iter_per_epoch / epoch_to_warm lr = min_lr + delta * iteration for p in optimizer.param_groups: p['lr'] = lr return lr # Optimizer criterion = FocalLoss(2) # metric_loss = RingLoss(type='auto', loss_weight=1.0) metric_loss = ArcFaceLoss() if cuda: metric_loss.cuda() optimizer = Nadam(model.parameters(), lr=0.0035, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, schedule_decay=4e-3) scheduler = CosineLRWithRestarts( optimizer, batch_size, len(data_generator.train_segments_indexes), restart_period=epoch_to_flat - epoch_to_warm + 1, t_mult=1, verbose=True) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Valid source if valid_source == 'curated': target_sources = ['curated'] elif valid_source == 'noisy': target_sources = ['noisy'] elif valid_source == 'both': target_sources = ['curated', 'noisy'] # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 epoch = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 2500 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() # Evaluate on partial of train data # logging.info('Train statistics:') # for target_source in target_sources: # validate_curated_statistics = evaluator.evaluate( # data_type='train', # target_source=target_source, # max_iteration=max_iteration, # verbose=False) # Evaluate on holdout validation data if holdout_fold != 'none': logging.info('Validate statistics:') for target_source in target_sources: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=False) validate_statistics_container.append( iteration, target_source, validate_curated_statistics) validate_statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 2500 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'mask', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_feature, batch_output = model(batch_data_dict['feature'], is_infer=False) # loss loss = criterion(batch_output, batch_data_dict['target']) + metric_loss( batch_feature, batch_data_dict['target']) # Backward optimizer.zero_grad() # LR Warm up if iteration < epoch_to_warm * iter_per_epoch: cur_lr = _warmup_lr(optimizer, iteration, iter_per_epoch, epoch_to_warm=epoch_to_warm, min_lr=0, max_lr=0.0035) loss.backward() optimizer.step() if iteration >= epoch_to_warm * iter_per_epoch and iteration < epoch_to_flat * iter_per_epoch: if data_generator.pointer >= len( data_generator.train_segments_indexes): scheduler.step() scheduler.batch_step() # Show LR information if iteration % iter_per_epoch == 0 and iteration != 0: epoch += 1 if epoch % 10 == 0: for p in optimizer.param_groups: logging.info( 'Learning rate at epoch {:3d} / iteration {:5d} is: {:.6f}' .format(epoch, iteration, p['lr'])) # Stop learning if iteration == total_iter: break iteration += 1 if iteration == epoch_to_warm * iter_per_epoch: scheduler.step() if iteration == epoch_to_flat * iter_per_epoch: for param_group in optimizer.param_groups: param_group['lr'] = 1e-5
def download_wavs(args): """Download videos and extract audio in wav format. """ # Paths csv_path = args.csv_path audios_dir = args.audios_dir mini_data = args.mini_data if mini_data: logs_dir = '_logs/download_dataset/{}'.format(get_filename(csv_path)) else: logs_dir = '_logs/download_dataset_minidata/{}'.format( get_filename(csv_path)) create_folder(audios_dir) create_folder(logs_dir) create_logging(logs_dir, filemode='w') logging.info('Download log is saved to {}'.format(logs_dir)) # Read csv with open(csv_path, 'r') as f: lines = f.readlines() lines = lines[3:] # Remove csv head info if mini_data: lines = lines[0:10] # Download small data for debug download_time = time.time() # Download for (n, line) in enumerate(lines): items = line.split(', ') audio_id = items[0] start_time = float(items[1]) end_time = float(items[2]) duration = end_time - start_time logging.info('{} {} start_time: {:.1f}, end_time: {:.1f}'.format( n, audio_id, start_time, end_time)) # Download full video of whatever format video_name = os.path.join(audios_dir, '_Y{}.%(ext)s'.format(audio_id)) os.system("youtube-dl --quiet -o '{}' -x https://www.youtube.com/watch?v={}"\ .format(video_name, audio_id)) video_paths = glob.glob( os.path.join(audios_dir, '_Y' + audio_id + '.*')) # If download successful if len(video_paths) > 0: video_path = video_paths[0] # Choose one video # Add 'Y' to the head because some video ids are started with '-' # which will cause problem audio_path = os.path.join(audios_dir, 'Y' + audio_id + '.wav') # Extract audio in wav format os.system("ffmpeg -loglevel panic -i {} -ac 1 -ar 32000 -ss {} -t 00:00:{} {} "\ .format(video_path, str(datetime.timedelta(seconds=start_time)), duration, audio_path)) # Remove downloaded video os.system("rm {}".format(video_path)) logging.info("Download and convert to {}".format(audio_path)) logging.info( 'Download finished! Time spent: {:.3f} s'.format(time.time() - download_time)) logging.info('Logs can be viewed in {}'.format(logs_dir))
def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' holdout_fold: '1', '2', '3', '4' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool, visualize the logmel spectrogram of segments ''' # Arugments & parameters dataset_dir = DATASET_DIR workspace = WORKSPACE train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type holdout_fold = args.holdout_fold model_type = args.model_type iteration = args.iteration batch_size = args.batch_size resume = args.resume cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' curated_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_curated.h5') noisy_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') curated_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_curated_cross_validation.csv') noisy_cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', 'train_noisy_cross_validation.csv') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') if not resume: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) else: checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, 'resume', '{}_iterations.pth'.format(iteration)) figs_dir = os.path.join(workspace, 'figures') create_folder(figs_dir) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if model_type == 'cbam_ResNet18': model = Model(18, classes_num * 2, 'CBAM') else: model = Model(classes_num * 2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( curated_feature_hdf5_path=curated_feature_hdf5_path, noisy_feature_hdf5_path=noisy_feature_hdf5_path, curated_cross_validation_path=curated_cross_validation_path, noisy_cross_validation_path=noisy_cross_validation_path, train_source=train_source, holdout_fold=holdout_fold, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, cuda=cuda) # Evaluate for target_source in ['curated', 'noisy']: validate_curated_statistics = evaluator.evaluate( data_type='validate', target_source=target_source, max_iteration=None, verbose=True) # Visualize if visualize: save_fig_path = os.path.join(figs_dir, '{}_logmel.png'.format(target_source)) validate_curated_statistics = evaluator.visualize( data_type='validate', target_source=target_source, save_fig_path=save_fig_path, max_iteration=None, verbose=False)
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename fixed = args.fixed finetune = args.finetune ite_train = args.ite_train ite_eva = args.ite_eva ite_store = args.ite_store mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join(workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if subtask in ['a', 'b']: if fixed=='True': model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True) else : model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False) loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer if fixed=='True': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) else : optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) if finetune=='True': model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth' device = torch.device('cuda') checkpoint = torch.load(model_path, map_location=device) model.load_state_dict(checkpoint['model']) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer(validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate #1800 if iteration % 200 == 0 and iteration > ite_eva: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate( data_type='train', source=source, max_iteration=None, verbose=False) if holdout_fold != 'none': for source in sources_to_evaluate: validate_statistics = evaluator.evaluate( data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 200 == 0 and iteration > ite_store: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict()} checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.93 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']: batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train # batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns']) # loss = loss_func(batch_output, batch_data_dict['target']) # Using Mixup model.train() mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2) batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4) if batch_output.shape[1] == 10: # single scale models loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam) else: # multi scale models losses = [] for ite in range(batch_output.shape[1]-1): loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam) losses.append(loss) loss = sum(losses) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning # 12000 for scratch if iteration == ite_train: break iteration += 1
def train(args): """Train a piano transcription system. Args: workspace: str, directory of your workspace model_type: str, e.g. 'Regressonset_regressoffset_frame_velocity_CRNN' loss_type: str, e.g. 'regress_onset_offset_frame_velocity_bce' augmentation: str, e.g. 'none' batch_size: int learning_rate: float reduce_iteration: int resume_iteration: int early_stop: int device: 'cuda' | 'cpu' mini_data: bool """ # Arugments & parameters workspace = args.workspace model_type = args.model_type loss_type = args.loss_type augmentation = args.augmentation max_note_shift = args.max_note_shift batch_size = args.batch_size learning_rate = args.learning_rate reduce_iteration = args.reduce_iteration resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') mini_data = args.mini_data filename = args.filename sample_rate = config.sample_rate segment_seconds = config.segment_seconds hop_seconds = config.hop_seconds segment_samples = int(segment_seconds * sample_rate) frames_per_second = config.frames_per_second classes_num = config.classes_num num_workers = 8 # Loss function loss_func = get_loss_func(loss_type) # Paths hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro') checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'max_note_shift={}'.format(max_note_shift), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join(workspace, 'statistics', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'max_note_shift={}'.format(max_note_shift), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'max_note_shift={}'.format(max_note_shift), 'batch_size={}'.format(batch_size)) create_folder(logs_dir) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(frames_per_second=frames_per_second, classes_num=classes_num) if augmentation == 'none': augmentor = None elif augmentation == 'aug': augmentor = Augmentor() else: raise Exception('Incorrect argumentation!') # Dataset train_dataset = MaestroDataset(hdf5s_dir=hdf5s_dir, segment_seconds=segment_seconds, frames_per_second=frames_per_second, max_note_shift=max_note_shift, augmentor=augmentor) evaluate_dataset = MaestroDataset(hdf5s_dir=hdf5s_dir, segment_seconds=segment_seconds, frames_per_second=frames_per_second, max_note_shift=0) # Sampler for training train_sampler = Sampler(hdf5s_dir=hdf5s_dir, split='train', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) # Sampler for evaluation evaluate_train_sampler = TestSampler(hdf5s_dir=hdf5s_dir, split='train', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) evaluate_validate_sampler = TestSampler(hdf5s_dir=hdf5s_dir, split='validation', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) evaluate_test_sampler = TestSampler(hdf5s_dir=hdf5s_dir, split='test', segment_seconds=segment_seconds, hop_seconds=hop_seconds, batch_size=batch_size, mini_data=mini_data) # Dataloader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) evaluate_train_loader = torch.utils.data.DataLoader( dataset=evaluate_dataset, batch_sampler=evaluate_train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) validate_loader = torch.utils.data.DataLoader( dataset=evaluate_dataset, batch_sampler=evaluate_validate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) test_loader = torch.utils.data.DataLoader( dataset=evaluate_dataset, batch_sampler=evaluate_test_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) # Evaluator evaluator = SegmentEvaluator(model, batch_size) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Resume training if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace, 'checkpoints', filename, model_type, 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) train_bgn_time = time.time() for batch_data_dict in train_loader: # Evaluation if iteration % 5000 == 0: # and iteration > 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() evaluate_train_statistics = evaluator.evaluate( evaluate_train_loader) validate_statistics = evaluator.evaluate(validate_loader) test_statistics = evaluator.evaluate(test_loader) logging.info( ' Train statistics: {}'.format(evaluate_train_statistics)) logging.info( ' Validation statistics: {}'.format(validate_statistics)) logging.info(' Test statistics: {}'.format(test_statistics)) statistics_container.append(iteration, evaluate_train_statistics, data_type='train') statistics_container.append(iteration, validate_statistics, data_type='validation') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 20000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if iteration % reduce_iteration == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) model.train() batch_output_dict = model(batch_data_dict['waveform']) loss = loss_func(model, batch_output_dict, batch_data_dict) print(iteration, loss) # Backward loss.backward() optimizer.step() optimizer.zero_grad() # Stop learning if iteration == early_stop: break iteration += 1
def inference_evaluation(args): '''Inference on evaluation data and write out submission file. Args: subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'leaderboard' | 'evaluation' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' mel_bins = config.mel_bins frames_per_second = config.frames_per_second in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) trained_sub_dir = get_subdir(subtask, 'development') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(trained_sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = EvaluationDataGenerator( feature_hdf5_path=feature_hdf5_path, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_evaluation(data_type) # Inference output_dict = forward(model, generate_func, cuda, return_input=False, return_target=False) # Write submission write_submission(output_dict, subtask, data_type, submission_path)
def train(args): """Train AudioSet tagging model. Args: dataset_dir: str workspace: str data_type: 'balanced_train' | 'unbalanced_train' frames_per_second: int mel_bins: int model_type: str loss_type: 'bce' balanced: bool augmentation: str batch_size: int learning_rate: float resume_iteration: int early_stop: int accumulation_steps: int cuda: bool """ # Arugments & parameters workspace = args.workspace data_type = args.data_type window_size = args.window_size hop_size = args.hop_size mel_bins = args.mel_bins fmin = args.fmin fmax = args.fmax model_type = args.model_type loss_type = args.loss_type balanced = args.balanced augmentation = args.augmentation batch_size = args.batch_size learning_rate = args.learning_rate resume_iteration = args.resume_iteration early_stop = args.early_stop device = torch.device('cuda') if args.cuda and torch.cuda.is_available( ) else torch.device('cpu') filename = args.filename num_workers = 8 sample_rate = config.sample_rate clip_samples = config.clip_samples classes_num = config.classes_num loss_func = get_loss_func(loss_type) # Paths black_list_csv = os.path.join(workspace, 'black_list', 'dcase2017task4.csv') train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', '{}.h5'.format(data_type)) eval_bal_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'balanced_train.h5') eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes', 'eval.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_folder(checkpoints_dir) statistics_path = os.path.join( workspace, 'statistics', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'statistics.pkl') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size)) create_logging(logs_dir, filemode='w') logging.info(args) if 'cuda' in str(device): logging.info('Using GPU.') device = 'cuda' else: logging.info('Using CPU.') device = 'cpu' # Model Model = eval(model_type) model = Model(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, classes_num=classes_num) params_num = count_parameters(model) # flops_num = count_flops(model, clip_samples) logging.info('Parameters num: {}'.format(params_num)) # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9)) # Dataset will be used by DataLoader later. Dataset takes a meta as input # and return a waveform and a target. dataset = AudioSetDataset(clip_samples=clip_samples, classes_num=classes_num) # Train sampler (train_sampler, train_collector) = get_train_sampler(balanced, augmentation, train_indexes_hdf5_path, black_list_csv, batch_size) # Evaluate sampler eval_bal_sampler = EvaluateSampler( indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=batch_size) eval_test_sampler = EvaluateSampler( indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=batch_size) eval_collector = Collator(mixup_alpha=None) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=train_collector, num_workers=num_workers, pin_memory=True) eval_bal_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_bal_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) eval_test_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=eval_test_sampler, collate_fn=eval_collector, num_workers=num_workers, pin_memory=True) # Evaluator bal_evaluator = Evaluator(model=model, generator=eval_bal_loader) test_evaluator = Evaluator(model=model, generator=eval_test_loader) # Statistics statistics_container = StatisticsContainer(statistics_path) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) train_bgn_time = time.time() # Resume training if resume_iteration > 0: resume_checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}' .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), 'data_type={}'.format(data_type), model_type, 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), '{}_iterations.pth'.format(resume_iteration)) logging.info('Loading checkpoint {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) train_sampler.load_state_dict(checkpoint['sampler']) statistics_container.load_state_dict(resume_iteration) iteration = checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) if 'cuda' in str(device): model.to(device) time1 = time.time() for batch_data_dict in train_loader: """batch_data_dict: { 'audio_name': (batch_size [*2 if mixup],), 'waveform': (batch_size [*2 if mixup], clip_samples), 'target': (batch_size [*2 if mixup], classes_num), (ifexist) 'mixup_lambda': (batch_size * 2,)} """ # Evaluate if (iteration % 2000 == 0 and iteration > resume_iteration) or (iteration == 0): train_fin_time = time.time() bal_statistics = bal_evaluator.evaluate() test_statistics = test_evaluator.evaluate() logging.info('Validate bal mAP: {:.3f}'.format( np.mean(bal_statistics['average_precision']))) logging.info('Validate test mAP: {:.3f}'.format( np.mean(test_statistics['average_precision']))) statistics_container.append(iteration, bal_statistics, data_type='bal') statistics_container.append(iteration, test_statistics, data_type='test') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 20000 == 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'sampler': train_sampler.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Move data to device for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Forward model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" # Loss loss = loss_func(batch_output_dict, batch_target_dict) # Backward loss.backward() print(loss) optimizer.step() optimizer.zero_grad() if iteration % 10 == 0: print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\ .format(iteration, time.time() - time1)) time1 = time.time() iteration += 1 # Stop learning if iteration == early_stop: break
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename seq_len = 640 mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num, seq_len, mel_bins, cuda) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) print('cliqueNet parameters:', sum(param.numel() for param in model.parameters())) # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output = model(batch_data_dict['feature']) # loss batch_target = batch_data_dict['{}_target'.format(taxonomy_level)] loss = binary_cross_entropy(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 3000: break iteration += 1
def train(args): # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace holdout_fold = args.holdout_fold model_type = args.model_type pretrained_checkpoint_path = args.pretrained_checkpoint_path freeze_base = args.freeze_base loss_type = args.loss_type augmentation = args.augmentation learning_rate = args.learning_rate batch_size = args.batch_size resume_iteration = args.resume_iteration stop_iteration = args.stop_iteration device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' filename = args.filename num_workers = 8 loss_func = get_loss_func(loss_type) pretrain = True if pretrained_checkpoint_path else False hdf5_path = os.path.join(workspace, 'features', 'waveform.h5') checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base)) create_folder(checkpoints_dir) statistics_path = os.path.join(workspace, 'statistics', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base), 'statistics.pickle') create_folder(os.path.dirname(statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, 'holdout_fold={}'.format(holdout_fold), model_type, 'pretrain={}'.format(pretrain), 'loss_type={}'.format(loss_type), 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size), 'freeze_base={}'.format(freeze_base)) create_logging(logs_dir, 'w') logging.info(args) if 'cuda' in device: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Model Model = eval(model_type) model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num, freeze_base) # Statistics statistics_container = StatisticsContainer(statistics_path) if pretrain: logging.info( 'Load pretrained model from {}'.format(pretrained_checkpoint_path)) model.load_from_pretrain(pretrained_checkpoint_path) if resume_iteration: resume_checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(resume_iteration)) logging.info( 'Load resume model from {}'.format(resume_checkpoint_path)) resume_checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(resume_checkpoint['model']) statistics_container.load_state_dict(resume_iteration) iteration = resume_checkpoint['iteration'] else: iteration = 0 # Parallel print('GPU number: {}'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model) dataset = GtzanDataset() # Data generator train_sampler = TrainSampler(hdf5_path=hdf5_path, holdout_fold=holdout_fold, batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size) validate_sampler = EvaluateSampler(hdf5_path=hdf5_path, holdout_fold=holdout_fold, batch_size=batch_size) # Data loader train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=train_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) validate_loader = torch.utils.data.DataLoader( dataset=dataset, batch_sampler=validate_sampler, collate_fn=collate_fn, num_workers=num_workers, pin_memory=True) if 'cuda' in device: model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) if 'mixup' in augmentation: mixup_augmenter = Mixup(mixup_alpha=1.) # Evaluator evaluator = Evaluator(model=model) train_bgn_time = time.time() # Train on mini batches for batch_data_dict in train_loader: # import crash # asdf # Evaluate if iteration % 200 == 0 and iteration > 0: if resume_iteration > 0 and iteration == resume_iteration: pass else: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() statistics = evaluator.evaluate(validate_loader) logging.info('Validate accuracy: {:.3f}'.format( statistics['accuracy'])) statistics_container.append(iteration, statistics, 'validate') statistics_container.dump() train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 2000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.module.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) if 'mixup' in augmentation: batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda( len(batch_data_dict['waveform'])) # Move data to GPU for key in batch_data_dict.keys(): batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device) # Train model.train() if 'mixup' in augmentation: batch_output_dict = model(batch_data_dict['waveform'], batch_data_dict['mixup_lambda']) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = { 'target': do_mixup(batch_data_dict['target'], batch_data_dict['mixup_lambda']) } """{'target': (batch_size, classes_num)}""" else: batch_output_dict = model(batch_data_dict['waveform'], None) """{'clipwise_output': (batch_size, classes_num), ...}""" batch_target_dict = {'target': batch_data_dict['target']} """{'target': (batch_size, classes_num)}""" # loss loss = loss_func(batch_output_dict, batch_target_dict) print(iteration, loss) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == stop_iteration: break iteration += 1