def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k') parser.add_argument('-p', '--path', type=str, help='path to the parameters.json file', default='../') args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path) # Define the augmentations augmentations = params['data_augmentations'] # Initialize AugmentedDataset aug_dataset = AugmentedDataset(dataset, params['features']['sr'], augmentations) # Process all files print('Processing ...') aug_dataset.process() print('Done!')
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k') parser.add_argument('-p', '--path', type=str, help='path to the parameters.json file', default='../') args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path) # Download dataset if dataset.check_if_downloaded(): resp = input( '''%s dataset is already there. It has been downloaded before. Do you want to download it anyway? [n] : ''' % args.dataset) if resp == 'y': dataset.download(force_download=True) else: dataset.download() print('Done!')
def do_features_extraction(status_features, feature_ix, sequence_time, sequence_hop_time, audio_hop, audio_win, sr, specific_parameters, dataset_path, audio_folder, features_folder, dataset_ix): global feature_extractor if status_features != 'EXTRACTING': # return [False, '', 'success', 'True'] raise dash.exceptions.PreventUpdate if feature_ix is None: return [True, 'Please select a Feature name', 'danger', 'True'] if dataset_ix is None: return [True, 'Please select a dataset', 'danger', 'True'] features_name = options_features[feature_ix]['label'] dataset_name = options_datasets[dataset_ix]['label'] feature_extractor_class = get_available_features()[features_name] specific_parameters = ast.literal_eval(specific_parameters) feature_extractor = feature_extractor_class( sequence_time=sequence_time, sequence_hop_time=sequence_hop_time, audio_win=audio_win, audio_hop=audio_hop, sr=sr, **specific_parameters) # get dataset class dataset_class = get_available_datasets()[dataset_name] dataset = dataset_class(dataset_path) if not dataset.check_if_downloaded(): return [ True, 'Please download the dataset before doing feature extraction', 'danger' ] print('Extracting features...') feature_extractor.extract(dataset) print('Done!') return [True, 'Features extracted', 'success', 'True']
def select_dataset(dataset_ix): print(dataset_ix) if dataset_ix is not None: dataset_name = options_datasets[dataset_ix]['label'] params_dataset = params['datasets'][dataset_name] # get dataset class dataset_class = get_available_datasets()[dataset_name] # init data_generator dataset = dataset_class(params_dataset['dataset_path']) options_folds = [{ 'label': name, 'value': value } for value, name in enumerate(dataset.fold_list)] return [ params_dataset['dataset_path'], # params_dataset['audio_folder'], # params_dataset['feature_folder'], '', '', options_folds ] else: return [""] * 4
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k') parser.add_argument( '-f', '--features', type=str, help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)', default='MelSpectrogram') parser.add_argument('-p', '--path', type=str, help='path to the parameters.json file', default='../') parser.add_argument( '-m', '--model', type=str, help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)', default='SB_CNN') parser.add_argument('-fold', '--fold_name', type=str, help='fold name', default='fold1') parser.add_argument('-s', '--models_path', type=str, help='path to save the trained model', default='../trained_models') parser.add_argument('--aug', dest='augmentation', action='store_true') parser.add_argument('--no-aug', dest='augmentation', action='store_false') parser.set_defaults(augmentation=False) args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') if args.features not in get_available_features(): raise AttributeError('Features not available') if args.model not in get_available_models(): raise AttributeError('Model not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] params_features = params['features'] params_model = params['models'][args.model] # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path) if args.fold_name not in dataset.fold_list: raise AttributeError('Fold not available') # Data augmentation if args.augmentation: # Define the augmentations augmentations = params['data_augmentations'] # Initialize AugmentedDataset dataset = AugmentedDataset(dataset, params['features']['sr'], augmentations) # Process all files print('Doing data augmentation ...') dataset.process() print('Done!') # Get and init feature class features_class = get_available_features()[args.features] features = features_class( sequence_time=params_features['sequence_time'], sequence_hop_time=params_features['sequence_hop_time'], audio_win=params_features['audio_win'], audio_hop=params_features['audio_hop'], sr=params_features['sr'], **params_features[args.features]) print('Features shape: ', features.get_shape()) # Check if features were extracted if not features.check_if_extracted(dataset): print('Extracting features ...') features.extract(dataset) print('Done!') use_validate_set = True if args.dataset in ['TUTSoundEvents2017', 'ESC50', 'ESC10']: # When have less data, don't use validation set. use_validate_set = False folds_train, folds_val, _ = evaluation_setup( args.fold_name, dataset.fold_list, params_dataset['evaluation_mode'], use_validate_set=use_validate_set) data_gen_train = DataGenerator(dataset, features, folds=folds_train, batch_size=params['train']['batch_size'], shuffle=True, train=True, scaler=None) scaler = Scaler(normalizer=params_model['normalizer']) print('Fitting scaler ...') scaler.fit(data_gen_train) print('Done!') # Pass scaler to data_gen_train to be used when data # loading data_gen_train.set_scaler(scaler) data_gen_val = DataGenerator(dataset, features, folds=folds_val, batch_size=params['train']['batch_size'], shuffle=False, train=False, scaler=scaler) # Define model features_shape = features.get_shape() n_frames_cnn = features_shape[1] n_freq_cnn = features_shape[2] n_classes = len(dataset.label_list) model_class = get_available_models()[args.model] metrics = ['classification'] if args.dataset in sed_datasets: metrics = ['sed'] if args.dataset in tagging_datasets: metrics = ['tagging'] model_container = model_class(model=None, model_path=None, n_classes=n_classes, n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn, metrics=metrics, **params_model['model_arguments']) model_container.model.summary() # Set paths model_folder = os.path.join(args.models_path, args.model, args.dataset) exp_folder = os.path.join(model_folder, args.fold_name) mkdir_if_not_exists(exp_folder, parents=True) # Save model json and scaler model_container.save_model_json(model_folder) save_pickle(scaler, os.path.join(exp_folder, 'scaler.pickle')) # data_train = data_gen_train.get_data() # data_val = data_gen_val.get_data() # Train model model_container.train( data_gen_train, data_gen_val, # data_train, data_val, label_list=dataset.label_list, weights_path=exp_folder, **params['train'], sequence_time_sec=params_features['sequence_hop_time'])
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( '-od', '--origin_dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k' ) parser.add_argument( '-ofold', '--origin_fold_name', type=str, help='origin fold name', default='fold1') parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='ESC50' ) parser.add_argument( '-fold', '--fold_name', type=str, help='destination fold name', default='fold1') parser.add_argument( '-f', '--features', type=str, help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)', default='MelSpectrogram' ) parser.add_argument( '-p', '--path', type=str, help='path to the parameters.json file', default='../' ) parser.add_argument( '-m', '--model', type=str, help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)', default='SB_CNN') parser.add_argument( '-s', '--models_path', type=str, help='path to save the trained model', default='../trained_models' ) args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') if args.features not in get_available_features(): raise AttributeError('Features not available') if args.model not in get_available_models(): raise AttributeError('Model not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] params_features = params['features'] params_model = params['models'][args.model] # Load origin model model_path_origin = os.path.join(args.models_path, args.model, args.origin_dataset) model_class = get_available_models()[args.model] metrics = ['accuracy'] if args.dataset in sed_datasets: metrics = ['sed'] model_container = model_class( model=None, model_path=model_path_origin, metrics=metrics ) model_container.load_model_weights( os.path.join(model_path_origin, args.origin_fold_name)) kwargs = {} if args.dataset in sed_datasets: kwargs = {'sequence_hop_time': params_features['sequence_hop_time']} # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path, **kwargs) if args.fold_name not in dataset.fold_list: raise AttributeError('Fold not available') # Get and init feature class features_class = get_available_features()[args.features] features = features_class( sequence_time=params_features['sequence_time'], sequence_hop_time=params_features['sequence_hop_time'], audio_win=params_features['audio_win'], audio_hop=params_features['audio_hop'], sr=params_features['sr'], **params_features[args.features] ) print('Features shape: ', features.get_shape()) # Check if features were extracted if not features.check_if_extracted(dataset): print('Extracting features ...') features.extract(dataset) print('Done!') use_validate_set = True if args.dataset in ['TUTSoundEvents2017', 'ESC50', 'ESC10']: # When have less data, don't use validation set. use_validate_set = False folds_train, folds_val, _ = evaluation_setup( args.fold_name, dataset.fold_list, params_dataset['evaluation_mode'], use_validate_set=use_validate_set ) data_gen_train = DataGenerator( dataset, features, folds=folds_train, batch_size=params['train']['batch_size'], shuffle=True, train=True, scaler=None ) scaler = Scaler(normalizer=params_model['normalizer']) print('Fitting features ...') scaler.fit(data_gen_train) print('Done!') data_gen_train.set_scaler(scaler) data_gen_val = DataGenerator( dataset, features, folds=folds_val, batch_size=params['train']['batch_size'], shuffle=False, train=False, scaler=scaler ) # Fine-tune model n_classes = len(dataset.label_list) layer_where_to_cut = -2 model_container.fine_tuning(layer_where_to_cut, new_number_of_classes=n_classes, new_activation='sigmoid', freeze_source_model=True) model_container.model.summary() # Set paths model_folder = os.path.join( args.models_path, args.model, args.origin_dataset+'_ft_'+args.dataset) exp_folder = os.path.join(model_folder, args.fold_name) mkdir_if_not_exists(exp_folder, parents=True) # Save model json and scaler model_container.save_model_json(model_folder) save_pickle(scaler, os.path.join(exp_folder, 'scaler.pickle')) # Train model model_container.train( data_gen_train, data_gen_val, label_list=dataset.label_list, weights_path=exp_folder, sequence_time_sec=params_features['sequence_hop_time'], **params['train'])
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k') parser.add_argument( '-f', '--features', type=str, help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)', default='MelSpectrogram') parser.add_argument('-p', '--path', type=str, help='path to the parameters.json file', default='../') args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') if args.features not in get_available_features(): raise AttributeError('Features not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_dataset = params['datasets'][args.dataset] params_features = params['features'] # Get and init dataset class dataset_class = get_available_datasets()[args.dataset] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path) # Get and init feature class features_class = get_available_features()[args.features] features = features_class( sequence_time=params_features['sequence_time'], sequence_hop_time=params_features['sequence_hop_time'], audio_win=params_features['audio_win'], audio_hop=params_features['audio_hop'], sr=params_features['sr'], **params_features[args.features]) # Extract features if features.check_if_extracted(dataset): print('%s features were already extracted for %s dataset. ' % (args.features, args.dataset)) else: print('Extracting features ...') features.extract(dataset) print('Done!')
def main(): # Parse arguments parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '-d', '--dataset', type=str, help='dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED, SONYC_UST)', default='UrbanSound8k') parser.add_argument( '-f', '--features', type=str, help='features name (e.g. Spectrogram, MelSpectrogram, Openl3)', default='MelSpectrogram') parser.add_argument('-p', '--path', type=str, help='path to the parameters.json file', default='../') parser.add_argument( '-m', '--model', type=str, help='model name (e.g. MLP, SB_CNN, SB_CNN_SED, A_CRNN, VGGish)', default='SB_CNN') parser.add_argument('-fold', '--fold_name', type=str, help='fold name', default='fold1') parser.add_argument('-s', '--models_path', type=str, help='path to load the trained model', default='../trained_models') parser.add_argument( '-ft', '--fine_tuning', type=str, help='fine-tuned dataset name (e.g. UrbanSound8k, ESC50, URBAN_SED)', ) args = parser.parse_args() print(__doc__) if args.dataset not in get_available_datasets(): raise AttributeError('Dataset not available') if args.features not in get_available_features(): raise AttributeError('Features not available') if args.model not in get_available_models(): raise AttributeError('Model not available') # Get parameters parameters_file = os.path.join(args.path, 'parameters.json') params = load_json(parameters_file) params_features = params['features'] dataset_name = (args.dataset if args.fine_tuning is None else args.fine_tuning) params_dataset = params['datasets'][dataset_name] # Get and init dataset class dataset_class = get_available_datasets()[dataset_name] dataset_path = os.path.join(args.path, params_dataset['dataset_path']) dataset = dataset_class(dataset_path) if args.fold_name not in dataset.fold_list: raise AttributeError('Fold not available') # Get and init feature class features_class = get_available_features()[args.features] features = features_class( sequence_time=params_features['sequence_time'], sequence_hop_time=params_features['sequence_hop_time'], audio_win=params_features['audio_win'], audio_hop=params_features['audio_hop'], sr=params_features['sr'], **params_features[args.features]) # Check if features were extracted if not features.check_if_extracted(dataset): print('Extracting features ...') features.extract(dataset) print('Done!') # Set paths if args.fine_tuning is None: dataset_path = args.dataset else: dataset_path = args.dataset + '_ft_' + args.fine_tuning model_folder = os.path.join(args.models_path, args.model, dataset_path) exp_folder = os.path.join(model_folder, args.fold_name) # Load scaler scaler_file = os.path.join(exp_folder, 'scaler.pickle') scaler = load_pickle(scaler_file) # Init data generator data_gen_test = DataGenerator(dataset, features, folds=[args.fold_name], batch_size=params['train']['batch_size'], shuffle=False, train=False, scaler=scaler) # Load model and best weights model_class = get_available_models()[args.model] metrics = ['classification'] if dataset_name in sed_datasets: metrics = ['sed'] if args.dataset in tagging_datasets: metrics = ['tagging'] model_container = model_class(model=None, model_path=model_folder, metrics=metrics) model_container.load_model_weights(exp_folder) kwargs = {} if dataset_name in sed_datasets: kwargs = { 'sequence_time_sec': params_features['sequence_hop_time'], 'metric_resolution_sec': 1.0 } results = model_container.evaluate(data_gen_test, label_list=dataset.label_list, **kwargs) print(results[metrics[0]])
color="primary", className="mr-1", disabled=False) # Feedback message of feature extraction msg_features = dbc.Alert( "Messages about feature extractor", id="msg_features", is_open=False, duration=4000, ) # Dataset parameters # Dataset selector datasets_classes = get_available_datasets() options_datasets = [{ 'label': name, 'value': value } for value, name in enumerate(datasets_classes.keys())] dataset_selector = dbc.FormGroup( [ dbc.Label("Dataset", html_for="dropdown", width=2), dbc.Col(dcc.Dropdown(id="dataset_name", options=options_datasets), width=10), ], row=True, ) # Dataset path input dataset_path_input = dbc.FormGroup(
def create_model(n_clicks_create_model, n_clicks_load_model, model_ix, feature_ix, dataset_ix, model_parameters, sequence_time, sequence_hop_time, audio_hop, audio_win, sr, specific_parameters, dataset_path, audio_folder, features_folder, model_path): global model_container global feature_extractor global dataset ctx = dash.callback_context if (n_clicks_create_model is None) & (n_clicks_load_model is None): return [False, "", 'success', ''] else: button_id = ctx.triggered[0]['prop_id'].split('.')[0] if (button_id == 'create_model') | (button_id == 'load_model'): if model_ix is None: return [True, 'Please select a Model', 'danger', ''] if feature_ix is None: return [True, 'Please select a Feature extractor', 'danger', ''] if dataset_ix is None: return [True, 'Please select a Dataset', 'danger', ''] model_name = options_models[model_ix]['label'] feature_name = options_features[feature_ix]['label'] dataset_name = options_datasets[dataset_ix]['label'] feature_extractor_class = get_available_features()[feature_name] specific_parameters = ast.literal_eval(specific_parameters) feature_extractor = feature_extractor_class( sequence_time=sequence_time, sequence_hop_time=sequence_hop_time, audio_win=audio_win, audio_hop=audio_hop, sr=sr, **specific_parameters) features_shape = feature_extractor.get_shape() n_frames_cnn = features_shape[1] n_freq_cnn = features_shape[2] # get dataset class dataset_class = get_available_datasets()[dataset_name] # init data_generator kwargs = {} if dataset_name == 'URBAN_SED': kwargs = {'sequence_hop_time': sequence_hop_time} dataset = dataset_class(dataset_path, **kwargs) n_classes = len(dataset.label_list) model_class = get_available_models()[model_name] model_parameters = ast.literal_eval(model_parameters) if (button_id == 'create_model'): with graph.as_default(): model_container = model_class(model=None, model_path=None, n_classes=n_classes, n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn, **model_parameters) model_container.model.summary() if model_name == 'VGGish': model_container.load_pretrained_model_weights() model_container.fine_tuning( -1, new_number_of_classes=n_classes, new_activation='softmax', freeze_source_model=True) stringlist = [] model_container.model.summary( print_fn=lambda x: stringlist.append(x)) summary = "\n".join(stringlist) mkdir_if_not_exists(conv_path(os.path.dirname(model_path))) mkdir_if_not_exists(conv_path(model_path)) model_container.save_model_json(conv_path(model_path)) return [True, 'Model created', 'success', summary] if (button_id == 'load_model'): with graph.as_default(): model_container = model_class(model=None, model_path=conv_path(model_path)) model_container.model.summary() stringlist = [] model_container.model.summary( print_fn=lambda x: stringlist.append(x)) summary = "\n".join(stringlist) return [True, 'Model loaded', 'success', summary] # model_container.save_model_weights(model_path) return [False, "", 'success', '']
def check_pipeline(feature_ix, sequence_time, sequence_hop_time, audio_hop, audio_win, sr, specific_parameters, dataset_path, audio_folder, features_folder, dataset_ix, end_features_extraction, status_features, model_parameters, model_path, model_ix): global model_container global feature_extractor global data_generator_train ctx = dash.callback_context button_id = ctx.triggered[0]['prop_id'].split('.')[0] # if was trigger by end_features_extraction and # the features were already calculated if button_id == 'end_features_extraction' and \ status_features == 'NOT_EXTRACTING': raise dash.exceptions.PreventUpdate feature_extractor = None if feature_ix is not None: feature_name = (options_features[feature_ix]['label'] if feature_ix is not None else "") feature_extractor_class = get_available_features()[feature_name] specific_parameters = ast.literal_eval(specific_parameters) feature_extractor = feature_extractor_class( sequence_time=sequence_time, sequence_hop_time=sequence_hop_time, audio_win=audio_win, audio_hop=audio_hop, sr=sr, **specific_parameters) checks = [] if dataset_ix is not None: dataset_name = options_datasets[dataset_ix]['label'] # get dataset class dataset_class = get_available_datasets()[dataset_name] dataset = dataset_class(dataset_path) if dataset.check_if_downloaded(): checks.append('dataset') if feature_ix is not None: features_extracted = feature_extractor.check_if_extracted(dataset) if features_extracted: checks.append('features') if model_ix is not None: model_name = options_models[model_ix]['label'] features_shape = feature_extractor.get_shape() n_frames_cnn = features_shape[1] n_freq_cnn = features_shape[2] n_classes = len(dataset.label_list) model_class = get_available_models()[model_name] model_parameters = ast.literal_eval(model_parameters) with graph.as_default(): model_container = model_class(model=None, model_path=None, n_classes=n_classes, n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn, **model_parameters) if model_name == 'VGGish': model_container.load_pretrained_model_weights() model_container.fine_tuning( -1, new_number_of_classes=n_classes, new_activation='softmax', freeze_source_model=True) model_exists = model_container.check_if_model_exists( conv_path(model_path)) if model_exists: checks.append('model') return [checks]