def process_dataset(dataset_dir, config, df): # Create a directory for outputs. os.makedirs(mkpath(dataset_dir, 'tfrecord'), exist_ok=True) # Create a pool for multi-processing. # The number of processes will be set as same as the number of cpus. with Pool(processes=None) as pool: for split in ['train', 'val', 'test']: print(f'=> Processing split "{split}".') df_split = df[df['split'] == split] shards = df_split.shard.unique() for shard in sorted(shards): df_split_shard = df_split[df_split['shard'] == shard] filename = f'{split}-{shard + 1:04d}-{len(shards):04d}.tfrecord' filepath = mkpath(dataset_dir, 'tfrecord', filename) with tf.python_io.TFRecordWriter(filepath) as writer: list_args = [(row, config, split) for _, row in df_split_shard.iterrows()] for i, examples in enumerate( pool.imap(process_audio, list_args)): for example in examples: writer.write(example.SerializeToString()) progress = int(round((i + 1) / len(list_args) * 100)) print( f'\rShard ({shard+1:04d}/{len(shards):04d}): {progress:3d}%', end='', flush=True) print()
def main(args): args.model_path = mkpath(args.model_path) args.dataset = args.dataset or args.model_path.split('/')[-2].split('-')[1] # extract dataset name from train_dir. if args.dataset == 'mtt': config = MTT_CONFIG classes = data.mtt.CLASSES elif args.dataset == 'scd': config = SCD_CONFIG classes = data.scd.CLASSES elif args.dataset == 'dcs': config = DCS_CONFIG classes = data.dcs.CLASSES else: raise Exception('Not implemented.') # Create training, validation, and test datasets. dataset_path = mkpath(args.data_dir, args.dataset, 'tfrecord') dataset_test = create_datasets(dataset_path, args.batch_size, args.num_readers, config, only_test=True) # Load the trained model. model = tf.keras.models.load_model(args.model_path, custom_objects={'AudioVarianceScaling': AudioVarianceScaling, 'tf': tf}) # Evaluate evaluate(model, dataset_test, config, classes=classes)
def main(args): print(f'=> Dataset: {args.dataset}') if args.dataset == 'mtt': config = MTT_CONFIG elif args.dataset == 'scd': config = SCD_CONFIG elif args.dataset == 'dcs': config = DCS_CONFIG else: raise Exception(f'Not implemented dataset: {args.dataset}') dataset_path = mkpath(args.data_dir, args.dataset) tfrecord_path = f'{dataset_path}/tfrecord' # Configure the model. model_config = ModelConfig(block=args.block, amplifying_ratio=args.amplifying_ratio, multi=args.multi, num_blocks=config.num_blocks, dropout=args.dropout, activation=config.activation, num_classes=config.num_classes) # Set the training directory. args.train_dir = mkpath(args.log_dir, datetime.now().strftime('%Y%m%d_%H%M%S') + f'-{args.dataset}') if args.name is None: args.name = model_config.get_signature() args.train_dir += '-' + args.name os.makedirs(args.train_dir, exist_ok=False) print('=> Training directory: ' + args.train_dir) # Create training, validation, and test datasets. dataset_train, dataset_val, dataset_test = create_datasets(tfrecord_path, args.batch_size, args.num_readers, config) model = SampleCNN(model_config) model_config.print_summary() num_params = int(sum([K.count_params(p) for p in set(model.trainable_weights)])) print(f'=> #params: {num_params:,}') for stage in range(args.num_stages): print(f'=> Stage {stage}') # Set the learning rate of current stage lr = args.lr * (args.lr_decay ** stage) # Train the network. train(model, lr, dataset_train, dataset_val, config, args) # Load the best model. model = tf.keras.models.load_model(f'{args.train_dir}/best.h5', custom_objects={'AudioVarianceScaling': AudioVarianceScaling, 'tf': tf}) # Evaluate. rocauc, prauc, acc, f1 = evaluate(model, dataset_test, config) # Change the file name of the best checkpoint with the scores. os.rename(f'{args.train_dir}/best.h5', f'{args.train_dir}/final-auc_{rocauc:.6f}-acc_{acc:.6f}-f1_{f1:.6f}.h5') # Report the final scores. print(f'=> FINAL SCORES [{args.dataset}] {args.name}: ' f'rocauc={rocauc:.6f}, acc={acc:.6f}, f1={f1:.6f}, prauc={prauc:.6f}') model_config.print_summary() return rocauc, prauc, acc, f1
def make_dataset_info(dataset_dir, num_audios_per_shard): df_train = read_csv( mkpath(dataset_dir, 'raw/groundtruth_weak_label_training_set.csv')) df_test = read_csv( mkpath(dataset_dir, 'raw/groundtruth_weak_label_testing_set.csv')) df_eval = read_csv( mkpath(dataset_dir, 'raw/groundtruth_weak_label_evaluation_set.csv')) df_train['path'] = [ mkpath(dataset_dir, f'raw/{DIR_TRAIN}/Y{f}') for f in df_train['file'] ] df_test['path'] = [ mkpath(dataset_dir, f'raw/{DIR_TEST}/Y{f}') for f in df_test['file'] ] df_eval['path'] = [ mkpath(dataset_dir, f'raw/{DIR_EVAL}/Y{f}') for f in df_eval['file'] ] df_train = pd.concat([df_train, df_test]) # Split validation set. val_files = [] for c in CLASSES: df_class = df_train[df_train['label'] == c] val_files += df_class.sample(frac=0.1, random_state=123)['file'].tolist() val_files = list(set(val_files)) is_val = df_train['file'].isin(val_files) df_val = df_train[is_val].assign(split='val') df_train = df_train[~is_val].assign(split='train') df_eval = df_eval.assign(split='test') df = pd.concat([df_train, df_val, df_eval]) # Encode labels. label = df.groupby('file')['label'].apply(list) label.iloc[:] = [encode(l) for l in label] label = label.to_frame().reset_index() df = df.drop_duplicates('file').drop('label', axis=1).merge(label, on='file') # Shuffle and shard. df = shuffle(df, random_state=123) for split in ['train', 'val', 'test']: num_audios = sum(df['split'] == split) num_shards = num_audios // num_audios_per_shard num_remainders = num_audios % num_audios_per_shard shards = np.tile(np.arange(num_shards), num_audios_per_shard) shards = np.concatenate( [shards, np.arange(num_remainders) % num_shards]) shards = np.random.permutation(shards) df.loc[df['split'] == split, 'shard'] = shards df['shard'] = df['shard'].astype(int) return df
def main(args): dataset_dir = mkpath(args.data_dir, args.dataset) if args.dataset == 'mtt': config = data.config.MTT_CONFIG df = data.mtt.make_dataset_info(dataset_dir, config.num_audios_per_shard) elif args.dataset == 'scd': config = data.config.SCD_CONFIG df = data.scd.make_dataset_info(dataset_dir, config.num_audios_per_shard) elif args.dataset == 'dcs': config = data.config.DCS_CONFIG df = data.dcs.make_dataset_info(dataset_dir, config.num_audios_per_shard) else: raise Exception('Not implemented dataset: ' + args.dataset) process_dataset(dataset_dir, config, df)
def load_audio_paths(dataset_dir): audio_paths = glob(mkpath(dataset_dir, 'raw/*/*.wav')) noise_paths = glob(mkpath(dataset_dir, 'raw/_background_noise_/*.wav')) with open(mkpath(dataset_dir, 'raw/validation_list.txt')) as f: val_paths = f.read().splitlines() val_paths = [mkpath(dataset_dir, 'raw', path) for path in val_paths] with open(mkpath(dataset_dir, 'raw/testing_list.txt')) as f: test_paths = f.read().splitlines() test_paths = [mkpath(dataset_dir, 'raw', path) for path in test_paths] # Remove validation, test set, and noises from the training set. train_paths = list( set(audio_paths) - set(val_paths) - set(test_paths) - set(noise_paths)) # Sort paths. train_paths.sort(), val_paths.sort(), test_paths.sort() return train_paths, val_paths, test_paths