def load_validation_data(setting_path, user_settings): settings_feat = common.load_settings(setting_path, 'feature.yml') settings_train = common.load_settings(setting_path, 'train.yml') settings = dict(settings_feat, **settings_train) settings['store'] = user_settings['store'] train_df, val_df = load_training_data(settings) del settings_feat, settings_train val_df['npy'] = val_df.apply(lambda row: os.path.join(settings['store'], row.label, 'preprocess', row.path.replace('npz', 'npy')), axis=1) val_df['mp3'] = val_df.apply(lambda row: os.path.join(settings['store'], row.label, 'clips', row.path.replace('npz', 'mp3')), axis=1) print(val_df.shape) print('Columns', val_df.columns) return val_df
def __init__(self, settings_path, user_settings): """ 1. preprocessor; 2. featurizer; 3. settings; 4. load model; 5. loader """ settings = common.load_settings(settings_path, default_conf_name='preprocess.yml') self.preprocessor = preprocessing.AudioPreprocessor(self._apply_user_settings(settings, user_settings)) del settings settings_feature = common.load_settings(settings_path, default_conf_name='feature.yml') self.featurizer = featurization.AudioFeature(self._apply_user_settings(settings_feature, user_settings)) settings_train = common.load_settings(settings_path, default_conf_name='train.yml') settings_model = common.load_settings(settings_path, default_conf_name=settings_train['model_conf']) settings = dict(settings_train, **settings_model) settings.update(settings_feature) self.settings = self._apply_user_settings(settings, user_settings) # Prepare predictor self.model = keras.models.load_model(self.best_model_fname) self.loader = lambda mels, start_time=None: self.featurizer.load_sample(mels, window_frames=self.settings['frames'], start_time=start_time, normalize=self.settings['normalize'])
def main(): """ parse -> load settings -> train_model """ setup_keras() args = parse() train_settings = common.load_settings(args.settings_path, default_conf_name='train.yml') train_settings['store'] = args.store feature_settings = common.load_settings(args.settings_path, default_conf_name='feature.yml') model_settings = common.load_settings(args.settings_path, default_conf_name=train_settings['model_conf']) train_df, val_df = load_training_data(dict(train_settings, **feature_settings)) assert train_df.shape[0] > val_df.shape[0] * 4.5, f'training data {train_df.shape[0]} should be much larger than validation {val_df.shape[0]}' sample_featurizer = AudioFeature(feature_settings) if args.load_name: model_name = args.load_name print('Loading existing model', model_name) m = keras.models.load_model(model_name) else: t = datetime.datetime.now().strftime('%Y%m%d-%H%M') model_name = f"model-{model_settings['model']}_hop{feature_settings['hop_length']}_{t}" m = models.build(dict(model_settings, **feature_settings)) m.summary() output_dir = os.path.join(args.model_store, model_name) print(f"Training model: '{model_name}'", json.dumps(train_settings, indent=1)) combined_settings = dict(train_settings, **model_settings, **feature_settings) h = train_model(output_dir, train_df, val_df, model=m, sample_featurizer=sample_featurizer, settings=combined_settings)
def main(): """ parse -> load settings -> prepare DataFrame with meta info -> run preprocess """ args = parse() settings = common.load_settings(args.settings_path, default_conf_name='preprocess.yml') settings['store'] = args.store settings['lang'] = args.lang settings['force'] = args.force df = prepare_meta_df(settings) common.parallelize_dataframe(df, preprocess_dataframe, settings, n_cores=args.jobs)