def calculate_scalar(args): '''Calculate and write out scalar of features. Args: workspace: string subtask: 'a' | 'b' | 'c' data_type: 'train' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters workspace = args.workspace subtask = args.subtask data_type = args.data_type mini_data = args.mini_data mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) feature_path = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join( workspace, 'scalars_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) create_folder(os.path.dirname(scalar_path)) # Load data load_time = time.time() with h5py.File(feature_path, 'r') as hf: features = hf['feature_side'][:] # Calculate scalar features = np.concatenate(features, axis=0) (mean, std) = calculate_scalar_of_tensor(features) with h5py.File(scalar_path, 'w') as hf: hf.create_dataset('mean', data=mean, dtype=np.float32) hf.create_dataset('std', data=std, dtype=np.float32) print('All features: {}'.format(features.shape)) print('mean: {}'.format(mean)) print('std: {}'.format(std)) print('Write out scalar to {}'.format(scalar_path))
def _load_stat(model_type, subtask, source): sub_dir = get_subdir(subtask, data_type) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') validate_statistics_dict = cPickle.load( open(validate_statistics_path, 'rb')) accuracy_matrix = np.array( [stat['accuracy'] for stat in validate_statistics_dict[source]]) confusion_matrix = np.array([ stat['confusion_matrix'] for stat in validate_statistics_dict[source] ]) legend = '{}'.format(model_type) if subtask in ['a', 'b']: accuracy = np.mean(accuracy_matrix, axis=-1) results = {'accuracy': accuracy, 'legend': legend} print('Subtask: {}, Source: {}, Model: {} accuracy: {:.3f}'.format( subtask, source, model_type, accuracy[-1])) elif subtask == 'c': accuracy = np.mean(accuracy_matrix[:, 0:-1], axis=-1) unknown_accuracy = accuracy_matrix[:, -1] results = { 'accuracy': accuracy, 'unknown_accuracy': unknown_accuracy, 'legend': legend } print('Subtask: {}, Source: {}, Model: {}, accuracy: {:.3f}, ' 'Unknown accuracy: {:.3f}'.format(subtask, source, model_type, accuracy[-1], unknown_accuracy[-1])) return results
def load_imgsrc_local(self, img_dir): print("为timecard加载图片超链接") if not os.path.isdir(img_dir): print("文件夹不存在 %s" % img_dir) return from utilities import get_subdir from config import image_format dirs = get_subdir(img_dir) for each_dir in dirs: files = [ os.path.join(each_dir, f) for f in os.listdir(each_dir) if os.path.isfile(os.path.join(each_dir, f)) ] files = [f for f in files if f.split('.')[-1] in image_format] if files: weibo_title = int(each_dir.split(os.path.sep)[-2]) self.img_path[weibo_title - 1] = files[0]
def calculate_feature_for_all_audio_files(args): '''Calculate feature of audio files and write out features to a hdf5 file. Args: dataset_dir: string workspace: string subtask: 'a' | 'b' | 'c' data_type: 'development' | 'evaluation' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type mini_data = args.mini_data sample_rate = config.sample_rate window_size = config.window_size hop_size = config.hop_size mel_bins = config.mel_bins fmin = config.fmin fmax = config.fmax frames_per_second = config.frames_per_second frames_num = config.frames_num total_samples = config.total_samples lb_to_idx = config.lb_to_idx # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) metadata_path = os.path.join(dataset_dir, sub_dir, 'meta.csv') audios_dir = os.path.join(dataset_dir, sub_dir, 'audio') feature_path = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) create_folder(os.path.dirname(feature_path)) # Feature extractor feature_extractor = LogMelExtractor(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax) # Read metadata meta_dict = read_metadata(metadata_path) # Extract features and targets if mini_data: mini_num = 10 total_num = len(meta_dict['audio_name']) random_state = np.random.RandomState(1234) indexes = random_state.choice(total_num, size=mini_num, replace=False) meta_dict['audio_name'] = meta_dict['audio_name'][indexes] meta_dict['scene_label'] = meta_dict['scene_label'][indexes] meta_dict['identifier'] = meta_dict['identifier'][indexes] meta_dict['source_label'] = meta_dict['source_label'][indexes] print('Extracting features of all audio files ...') extract_time = time.time() # Hdf5 file for storing features and targets hf = h5py.File(feature_path, 'w') hf.create_dataset( name='audio_name', data=[audio_name.encode() for audio_name in meta_dict['audio_name']], dtype='S80') if 'scene_label' in meta_dict.keys(): hf.create_dataset(name='scene_label', data=[ scene_label.encode() for scene_label in meta_dict['scene_label'] ], dtype='S24') if 'identifier' in meta_dict.keys(): hf.create_dataset(name='identifier', data=[ identifier.encode() for identifier in meta_dict['identifier'] ], dtype='S24') if 'source_label' in meta_dict.keys(): hf.create_dataset(name='source_label', data=[ source_label.encode() for source_label in meta_dict['source_label'] ], dtype='S8') hf.create_dataset(name='feature_side', shape=(0, frames_num, mel_bins), maxshape=(None, frames_num, mel_bins), dtype=np.float32) for (n, audio_name) in enumerate(meta_dict['audio_name']): audio_path = os.path.join(audios_dir, audio_name) print(n, audio_path) # Read audio (audio, _) = read_side_audio(audio_path=audio_path, target_fs=sample_rate) # Pad or truncate audio recording to the same length audio = pad_truncate_sequence(audio, total_samples) # Extract feature feature = feature_extractor.transform(audio) # Remove the extra log mel spectrogram frames caused by padding zero feature = feature[0:frames_num] hf['feature_side'].resize((n + 1, frames_num, mel_bins)) hf['feature_side'][n] = feature hf.close() print('Write hdf5 file to {} using {:.3f} s'.format( feature_path, time.time() - extract_time))
def inference_evaluation(args): '''Inference on evaluation data and write out submission file. Args: subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'leaderboard' | 'evaluation' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' mel_bins = config.mel_bins frames_per_second = config.frames_per_second in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) trained_sub_dir = get_subdir(subtask, 'development') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(trained_sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = EvaluationDataGenerator( feature_hdf5_path=feature_hdf5_path, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_evaluation(data_type) # Inference output_dict = forward(model, generate_func, cuda, return_input=False, return_target=False) # Write submission write_submission(output_dict, subtask, data_type, submission_path)
def inference_validation(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters dataset_dir = args.dataset_dir subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type holdout_fold = args.holdout_fold iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data visualize = args.visualize filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second sources = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_train.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy #checkpoint = torch.load(checkpoint_path) #model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) if subtask in ['a', 'c']: evaluator.evaluate(data_type='validate', source='a', verbose=True) elif subtask == 'b': evaluator.evaluate(data_type='validate', source='a', verbose=True) evaluator.evaluate(data_type='validate', source='b', verbose=True) evaluator.evaluate(data_type='validate', source='c', verbose=True) # Visualize log mel spectrogram if visualize: evaluator.visualize(data_type='validate', source='a')
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename fixed = args.fixed finetune = args.finetune ite_train = args.ite_train ite_eva = args.ite_eva ite_store = args.ite_store mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join(workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if subtask in ['a', 'b']: if fixed=='True': model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True) else : model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False) loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer if fixed=='True': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) else : optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) if finetune=='True': model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth' device = torch.device('cuda') checkpoint = torch.load(model_path, map_location=device) model.load_state_dict(checkpoint['model']) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer(validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate #1800 if iteration % 200 == 0 and iteration > ite_eva: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate( data_type='train', source=source, max_iteration=None, verbose=False) if holdout_fold != 'none': for source in sources_to_evaluate: validate_statistics = evaluator.evaluate( data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 200 == 0 and iteration > ite_store: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict()} checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.93 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']: batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train # batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns']) # loss = loss_func(batch_output, batch_data_dict['target']) # Using Mixup model.train() mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2) batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4) if batch_output.shape[1] == 10: # single scale models loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam) else: # multi scale models losses = [] for ite in range(batch_output.shape[1]-1): loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam) losses.append(loss) loss = sum(losses) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning # 12000 for scratch if iteration == ite_train: break iteration += 1
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_train.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_left = os.path.join( workspace, 'features_left', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_right = os.path.join( workspace, 'features_right', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) feature_hdf5_path_side = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_left = os.path.join( workspace, 'scalars_left', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_right = os.path.join( workspace, 'scalars_right', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path_side = os.path.join( workspace, 'scalars_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) scalar_left = load_scalar(scalar_path_left) scalar_right = load_scalar(scalar_path_right) scalar_side = load_scalar(scalar_path_side) # Model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, feature_hdf5_path_left=feature_hdf5_path_left, feature_hdf5_path_right=feature_hdf5_path_right, feature_hdf5_path_side=feature_hdf5_path_side, train_csv=train_csv, validate_csv=validate_csv, scalar=scalar, scalar_left=scalar_left, scalar_right=scalar_right, scalar_side=scalar_side, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict, batch_data_dict_left, batch_data_dict_right, batch_data_dict_side in data_generator.generate_train( ): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate(data_type='train', source=source, max_iteration=None, verbose=False) for source in sources_to_evaluate: validate_statistics = evaluator.evaluate(data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) for key in batch_data_dict_left.keys(): if key in ['feature_left', 'target']: batch_data_dict_left[key] = move_data_to_gpu( batch_data_dict_left[key], cuda) for key in batch_data_dict_right.keys(): if key in ['feature_right', 'target']: batch_data_dict_right[key] = move_data_to_gpu( batch_data_dict_right[key], cuda) for key in batch_data_dict_side.keys(): if key in ['feature_side', 'target']: batch_data_dict_side[key] = move_data_to_gpu( batch_data_dict_side[key], cuda) # Train model.train() batch_output = model(data=batch_data_dict['feature'], data_left=batch_data_dict_left['feature_left'], data_right=batch_data_dict_right['feature_right'], data_side=batch_data_dict_side['feature_side']) # loss loss = loss_func(batch_output, batch_data_dict['target']) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 15000: break iteration += 1
def calculate_feature_for_all_audio_files(args): '''Calculate feature of audio files and write out features to a hdf5 file. Args: dataset_dir: string workspace: string subtask: 'a' | 'b' | 'c' data_type: 'development' | 'evaluation' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters # dataset_dir = args.dataset_dir # workspace = args.workspace # subtask = args.subtask # data_type = args.data_type # mini_data = args.mini_data dataset_dir = 'D:/Project/DCASE_test/Data' workspace = 'D:/Project/DCASE_test' subtask = 'a' data_type = 'development' mini_data = False sample_rate = config.sample_rate window_size = config.window_size hop_size = config.hop_size mel_bins = config.mel_bins fmin = config.fmin fmax = config.fmax frames_per_second = config.frames_per_second frames_num = config.frames_num total_samples = config.total_samples lb_to_idx = config.lb_to_idx mfcc_frames = config.mfcc_frames n_mfcc = config.n_mfcc mfcc_hop_size = config.mfcc_hop_size gamm_frames = config.gamm_frames n_gamm = config.n_gamm # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) audios_dir = os.path.join(dataset_dir, sub_dir, 'audio') if data_type == 'development': metadata_path = os.path.join(dataset_dir, sub_dir, 'meta.csv') elif data_type == 'leaderboard': metadata_path = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'test.csv') else: raise Exception('Incorrect data_type!') feature_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) create_folder(os.path.dirname(feature_path)) # Feature extractor feature_extractor = LogMelExtractor( sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax) # Read metadata meta_dict = read_metadata(metadata_path) # Extract features and targets if mini_data: mini_num = 300 total_num = len(meta_dict['audio_name']) random_state = np.random.RandomState(1234) indexes = random_state.choice(total_num, size=mini_num, replace=False) for key in meta_dict.keys(): meta_dict[key] = meta_dict[key][indexes] print('Extracting features of all audio files ...') extract_time = time.time() # Hdf5 file for storing features and targets hf = h5py.File(feature_path, 'w') hf.create_dataset( name='audio_name', data=[audio_name.encode() for audio_name in meta_dict['audio_name']], dtype='S80') if 'scene_label' in meta_dict.keys(): hf.create_dataset( name='scene_label', data=[scene_label.encode() for scene_label in meta_dict['scene_label']], dtype='S24') if 'identifier' in meta_dict.keys(): hf.create_dataset( name='identifier', data=[identifier.encode() for identifier in meta_dict['identifier']], dtype='S24') if 'source_label' in meta_dict.keys(): hf.create_dataset( name='source_label', data=[source_label.encode() for source_label in meta_dict['source_label']], dtype='S8') hf.create_dataset( name='feature', shape=(0, total_samples), maxshape=(None, total_samples), dtype=np.float32) hf.create_dataset( name='feature_gamm', shape=(0, gamm_frames, n_gamm), maxshape=(None, gamm_frames, n_gamm), dtype=np.float32) hf.create_dataset( name='feature_mfcc', shape=(0, mfcc_frames, n_mfcc), maxshape=(None, mfcc_frames, n_mfcc), dtype=np.float32) hf.create_dataset( name='feature_panns', shape=(0, 320000), maxshape=(None, 320000), dtype=np.float32) for (n, audio_name) in enumerate(meta_dict['audio_name']): audio_path = os.path.join(audios_dir, audio_name) print(n, audio_path) # Read audio (audio, _) = read_audio( audio_path=audio_path, target_fs=sample_rate) audio = pad_truncate_sequence(audio, total_samples) (audio_gamm, _) = read_audio_gamm( audio_path=audio_path, target_fs=sample_rate) fea_gamm, _ = gtg_in_dB(audio_gamm, sample_rate) fea_gamm = fea_gamm.transpose(1, 0) sound, fs = librosa.load(audio_path) fea_mfcc = librosa.feature.mfcc(y=sound, sr=fs, hop_length=mfcc_hop_size, n_mfcc=n_mfcc) fea_mfcc = fea_mfcc.transpose(1, 0) (waveform, _) = librosa.core.load(audio_path, sr=32000, mono=True) feature = feature_extractor.transform(audio) feature = feature[0 : frames_num] hf['feature'].resize((n + 1, total_samples)) hf['feature'][n] = audio hf['feature_gamm'].resize((n + 1, gamm_frames, n_gamm)) hf['feature_gamm'][n] = fea_gamm hf['feature_mfcc'].resize((n + 1, mfcc_frames, n_mfcc)) hf['feature_mfcc'][n] = fea_mfcc hf['feature_panns'].resize((n + 1, 320000)) hf['feature_panns'][n] = waveform hf.close() print('Write hdf5 file to {} using {:.3f} s'.format( feature_path, time.time() - extract_time))
def calculate_scalar(args): '''Calculate and write out scalar of features. Args: workspace: string subtask: 'a' | 'b' | 'c' data_type: 'train' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters # workspace = args.workspace # subtask = args.subtask # data_type = args.data_type # mini_data = args.mini_data workspace = 'D:/Project/DCASE_test' subtask = 'a' data_type = 'development' mini_data = False mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) feature_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) create_folder(os.path.dirname(scalar_path)) # Load data load_time = time.time() with h5py.File(feature_path, 'r') as hf: features = hf['feature'][:] features_gamm = hf['feature_gamm'][:] features_mfcc = hf['feature_mfcc'][:] features_panns = hf['feature_panns'][:] # Calculate scalar features = np.concatenate(features[None,:], axis=0) (mean, std) = calculate_scalar_of_tensor(features) features_gamm = np.concatenate(features_gamm, axis=0) (mean_gamm, std_gamm) = calculate_scalar_of_tensor(features_gamm) features_mfcc = np.concatenate(features_mfcc, axis=0) (mean_mfcc, std_mfcc) = calculate_scalar_of_tensor(features_mfcc) features_panns = np.concatenate(features_panns[None,:], axis=0) (mean_panns, std_panns) = calculate_scalar_of_tensor(features_panns) with h5py.File(scalar_path, 'w') as hf: hf.create_dataset('mean', data=mean, dtype=np.float32) hf.create_dataset('std', data=std, dtype=np.float32) hf.create_dataset('mean_gamm', data=mean_gamm, dtype=np.float32) hf.create_dataset('std_gamm', data=std_gamm, dtype=np.float32) hf.create_dataset('mean_mfcc', data=mean_mfcc, dtype=np.float32) hf.create_dataset('std_mfcc', data=std_mfcc, dtype=np.float32) hf.create_dataset('mean_panns', data=mean_panns, dtype=np.float32) hf.create_dataset('std_panns', data=std_panns, dtype=np.float32) print('Write out scalar to {}'.format(scalar_path))