def import_eeglab_sets(filepaths, target_path): # try load metadata-db metadb_file = os.path.join(target_path, 'metadata_db.pklz') if os.path.exists(metadb_file) and os.path.isfile(metadb_file): metadb = load(metadb_file) log.info('metadb loaded from {}'.format(metadb_file)) else: metadb = {} # empty DB log.info('no metadb found at {}. using empty db'.format(metadb_file)) for filepath in filepaths: # load extra data filename = os.path.basename(filepath) data, metadata = load_eeglab_data(filepath) # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata save(metadb_file, metadb, mkdirs=True) log.debug('imported as {}'.format(savepath))
def extract_output(experiment_root): train, model = load_results(experiment_root); # get the datasets with their names from the monitor for key, dataset in train.algorithm.monitoring_dataset.items(): # process each dataset with log_timing(log, 'processing dataset \'{}\''.format(key)): y_real, y_pred, output = process_dataset(model, dataset) save(os.path.join(experiment_root, 'cache', key+'_output.pklz'), (y_real, y_pred, output));
def extract_output(experiment_root): train, model = load_results(experiment_root) # get the datasets with their names from the monitor for key, dataset in train.algorithm.monitoring_dataset.items(): # process each dataset with log_timing(log, 'processing dataset \'{}\''.format(key)): y_real, y_pred, output = process_dataset(model, dataset) save(os.path.join(experiment_root, 'cache', key + '_output.pklz'), (y_real, y_pred, output))
def split_trial(path, trial_len): log.info('processing {}'.format(path)) datafile = glob.glob(os.path.join(path, '*.txt'))[0] metafile = glob.glob(os.path.join(path, '*_Trials_Onsets.xlsx'))[0] log.debug('data file: {}'.format(datafile)) log.debug('meta file: {}'.format(metafile)) onsets = load_xlsx_meta_file(metafile) data = load_data_file(datafile) log.debug(onsets) onsets.append([len(data), 'end']) # artificial last marker trials = {} for i in xrange(len(onsets) - 1): onset, label = onsets[i] next_onset = onsets[i + 1][0] # rounding to integers onset = int(math.floor(float(onset))) next_onset = int(math.floor(float(next_onset))) next_onset = min(onset + trial_len, next_onset) log.debug('[{}..{}) -> {}'.format(onset, next_onset, label)) trial_data = np.vstack(data[onset:next_onset]) log.debug('{} samples extracted'.format(trial_data.shape)) trials[label] = trial_data filename = os.path.join(path, 'trials.pklz') with log_timing(log, 'saving to {}'.format(filename)): save(filename, trials) return trials
def split_trial(path, trial_len): log.info('processing {}'.format(path)); datafile = glob.glob(os.path.join(path,'*.txt'))[0]; metafile = glob.glob(os.path.join(path,'*_Trials_Onsets.xlsx'))[0]; log.debug('data file: {}'.format(datafile)); log.debug('meta file: {}'.format(metafile)); onsets = load_xlsx_meta_file(metafile); data = load_data_file(datafile); log.debug(onsets); onsets.append([len(data), 'end']); # artificial last marker trials = {}; for i in xrange(len(onsets) - 1): onset, label = onsets[i]; next_onset = onsets[i+1][0]; # rounding to integers onset = int(math.floor(float(onset))); next_onset = int(math.floor(float(next_onset))); next_onset = min(onset+trial_len, next_onset); log.debug('[{}..{}) -> {}'.format(onset, next_onset, label)); trial_data = np.vstack(data[onset:next_onset]); log.debug('{} samples extracted'.format(trial_data.shape)); trials[label] = trial_data; filename = os.path.join(path, 'trials.pklz'); with log_timing(log, 'saving to {}'.format(filename)): save(filename, trials); return trials;
def save(filepath, data): return fs_util.save(filepath, data)
#, case_sensitive=False, limit=None, offset=0) for filename in files: x, y = loadfile(filename, auto_sample_rate=config.audio.autpsamplerate, samplerate=config.audio.samplerate, barsamples=config.audio.barsamples, maxbars=config.audio.maxbars) data.append(x) labels.append(y) data = np.vstack(data) # transform list to a big numpy array by stacking labels = np.vstack(labels) logging.info('loaded {0} values from {1} files in total'.format( data.shape, len(files))) # print labels; return (data, labels) if __name__ == '__main__': #pass # logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.DEBUG); # global config; config = Config(file('deepbeat.cfg')) logging.basicConfig(format=config.logger.pattern, level=logging.DEBUG) dataset = loadall(config.audio.path) #split = splitdata(dataset, ptest=config.audio.ptest, pvalid=config.audio.pvalid); #save(config.audio.datasetpath, split); save(config.audio.datasetpath, dataset) # load();
def import_dataset(source_path, target_path): # config = load_config(default_config='../train_sda.cfg'); # DATA_ROOT = source_path # DATA_ROOT = config.eeg.get('dataset_root', './') SAMPLE_RATE = 400 # in Hz TRIAL_LENGTH = 32 # in sec TRIAL_LENGTH += 4 # add 4s after end of presentation TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH log.info('using dataset at {}'.format(source_path)) ''' Note from Dan: All subjects should have channels 15, 16, 17 and 18 removed [...] If you want to make them truly identical, you could remove channel 19 from the subjects with more channels, although this should be 'good' data. ''' bad_channels = {} bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[2] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[4] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[5] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[6] = [7, 8, 9, 12, 15, 16, 17, 18] bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20] bad_channels[8] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20] bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20] label_converter = LabelConverter() metadb_file = os.path.join(target_path, 'metadata_db.pklz') metadb = {} # empty DB with log_timing(log, 'generating datasets'): for subject_id in xrange(1, 14): search_path = os.path.join(source_path, 'Sub{0:03d}*'.format(subject_id)) sourcefile_path = glob.glob(search_path) if sourcefile_path is None or len(sourcefile_path) == 0: log.warn('nothing found at {}'.format(search_path)) continue else: sourcefile_path = sourcefile_path[0] trials = split_session(sourcefile_path, TRIAL_SAMPLE_LENGTH) for stimulus, trial_data in trials.iteritems(): stimulus_id = label_converter.get_stimulus_id(stimulus) log.debug( 'processing {} with {} samples and stimulus_id {}'.format( stimulus, trial_data.shape, stimulus_id)) channels = trial_data.transpose() trial_data = [] channel_ids = [] for i, channel in enumerate(channels): channel_id = i + 1 # filter bad channels if channel_id in bad_channels[subject_id]: log.debug('skipping bad channel {}'.format(channel_id)) continue # convert to float32 channel = np.asfarray(channel, dtype='float32') trial_data.append(channel) channel_ids.append(channel_id) trial_data = np.vstack( trial_data).transpose() # fromat: (samples, channels) log.debug('extracted {} from channels: {}'.format( trial_data.shape, channel_ids)) label = label_converter.get_label( stimulus_id, 'rhythm') # raw label, unsorted label = label_converter.shuffle_classes[ label] # sorted label id metadata = { 'subject': subject_id, 'label': label, 'meta_label': label_converter.get_label(stimulus_id, 'rhythm_meta'), 'stimulus': stimulus, 'stimulus_id': stimulus_id, 'rhythm_type': label_converter.get_label(stimulus_id, 'rhythm'), 'tempo': label_converter.get_label(stimulus_id, 'tempo'), 'audio_file': label_converter.get_label(stimulus_id, 'audio_file'), 'trial_no': 1, 'trial_type': 'perception', 'condition': 'n/a', 'channels': channel_ids, } # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata log.debug('imported {}={} as {}'.format( label, metadata['meta_label'], savepath)) save(metadb_file, metadb, mkdirs=True) log.info('import finished')
def preprocess(config): # config = load_config(default_config='../train_sda.cfg'); DATA_ROOT = config.eeg.get('dataset_root', './'); SAMPLE_RATE = 400; # in Hz TRIAL_LENGTH = 32; # in sec TRIAL_LENGTH += 4; # add 4s after end of presentation TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH; log.info('using dataset at {}'.format(DATA_ROOT)); ''' Note from Dan: All subjects should have channels 15, 16, 17 and 18 removed [...] If you want to make them truly identical, you could remove channel 19 from the subjects with more channels, although this should be 'good' data. ''' bad_channels = {}; bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21]; bad_channels[2] = [ 7, 8, 15, 16, 17, 18, 20, 21]; bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21]; bad_channels[4] = [ 7, 8, 15, 16, 17, 18, 20, 21]; bad_channels[5] = [ 7, 8, 15, 16, 17, 18, 20, 21]; bad_channels[6] = [ 7, 8, 9, 12, 15, 16, 17, 18 ]; bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20 ]; bad_channels[8] = [ 7, 8, 15, 16, 17, 18, 20, 21]; bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20 ]; bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21]; bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21]; bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21]; bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20 ]; with log_timing(log, 'generating datasets'): for subject_id in xrange(1,14): search_path = os.path.join(DATA_ROOT, 'Sub{0:03d}*'.format(subject_id)); path = glob.glob(search_path); if path is None or len(path) == 0: log.warn('nothing found at {}'.format(search_path)); continue; else: path = path[0]; trials_filename = os.path.join(path, 'trials.pklz'); trials = None; if not os.path.isfile(trials_filename): log.debug('{} not found. running split_trial()'.format(trials_filename)); trials = split_trial(path, TRIAL_SAMPLE_LENGTH); else: with log_timing(log, 'loading data from {}'.format(trials_filename)): trials = load(trials_filename); assert trials; dataset_filename = os.path.join(path, 'dataset_13goodchannels_plus4s.pklz'); dataset = generate_cases(subject_id, trials, bad_channels[subject_id]); # = data, labels with log_timing(log, 'saving dataset to {}'.format(dataset_filename)): save(dataset_filename, dataset);
def preprocess(config): # config = load_config(default_config='../train_sda.cfg'); DATA_ROOT = config.eeg.get('dataset_root', './') SAMPLE_RATE = 400 # in Hz TRIAL_LENGTH = 32 # in sec TRIAL_LENGTH += 4 # add 4s after end of presentation TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH log.info('using dataset at {}'.format(DATA_ROOT)) ''' Note from Dan: All subjects should have channels 15, 16, 17 and 18 removed [...] If you want to make them truly identical, you could remove channel 19 from the subjects with more channels, although this should be 'good' data. ''' bad_channels = {} bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[2] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[4] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[5] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[6] = [7, 8, 9, 12, 15, 16, 17, 18] bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20] bad_channels[8] = [7, 8, 15, 16, 17, 18, 20, 21] bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20] bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20] with log_timing(log, 'generating datasets'): for subject_id in xrange(1, 14): search_path = os.path.join(DATA_ROOT, 'Sub{0:03d}*'.format(subject_id)) path = glob.glob(search_path) if path is None or len(path) == 0: log.warn('nothing found at {}'.format(search_path)) continue else: path = path[0] trials_filename = os.path.join(path, 'trials.pklz') trials = None if not os.path.isfile(trials_filename): log.debug('{} not found. running split_trial()'.format( trials_filename)) trials = split_trial(path, TRIAL_SAMPLE_LENGTH) else: with log_timing( log, 'loading data from {}'.format(trials_filename)): trials = load(trials_filename) assert trials dataset_filename = os.path.join( path, 'dataset_13goodchannels_plus4s.pklz') dataset = generate_cases(subject_id, trials, bad_channels[subject_id]) # = data, labels with log_timing(log, 'saving dataset to {}'.format(dataset_filename)): save(dataset_filename, dataset)
def run(self, classifiers=(), verbose=False, debug=False): print 'running job #{}'.format(self.job_id) import deepthought.util.fs_util as fs_util fs_util.ensure_dir_exists(self.output_path) print 'output path: ', self.output_path # prepare result objects results = {k: ClassificationResult(k) for (k, _) in classifiers} # load full dataset with all sources only once! from deepthought.datasets.hdf5 import get_dataset self.full_hdf5, self.full_meta = get_dataset(self.hdf5name, selectors=self.base_selectors, sources=None) self.initialize() # main loop ### # outer cross-validation outer_folds = self.fold_generator.get_outer_cv_folds() for ofi, ofold in enumerate(outer_folds): print 'processing outer fold', ofold # phase I : pre-train features ### encoder_fn = self.pretrain_encoder(ofi, ofold) # FIXME: add params # phase II : classify ### train_selectors = self.fold_generator.get_fold_selectors(outer_fold=ofold['train']) X_train, Y_train, meta_train = self.get_encoded_dataset(encoder_fn, train_selectors) test_selectors = self.fold_generator.get_fold_selectors(outer_fold=ofold['valid']) X_test, Y_test, _ = self.get_encoded_dataset(encoder_fn, test_selectors) for (classifier_name, classifier_factory) in classifiers: result = results[classifier_name] model_prefix = os.path.join(self.output_path, '{}_fold_{}'.format(classifier_name, ofi)) # generate index folds idx_folds = [] from deepthought.datasets.selection import DatasetMetaDB for ifold in self.fold_generator.get_inner_cv_folds(ofold): train_selectors = self.fold_generator.get_fold_selectors(outer_fold=ofold['train'], inner_fold=ifold['train']) metadb = DatasetMetaDB(meta_train, train_selectors.keys()) if 'valid' in ifold.keys(): valid_selectors = self.fold_generator.get_fold_selectors(outer_fold=ofold['train'], inner_fold=ifold['valid']) else: valid_selectors = None if debug: print 'train_selectors:', train_selectors print 'valid_selectors:', valid_selectors # get selected trial IDs train_idx = metadb.select(train_selectors) if valid_selectors is not None: valid_idx = metadb.select(valid_selectors) else: valid_idx = [] idx_folds.append((train_idx, valid_idx)) if debug: print idx_folds # print the generated folds before running the classifier # train classifier classifier, predict_fn = classifier_factory.train(X_train, Y_train, idx_folds, self.hyper_params, model_prefix) # test classifier train_Y_pred = predict_fn(X_train) test_Y_pred = predict_fn(X_test) # append to result result.append_train(Y_train, train_Y_pred) result.append_test(Y_test, test_Y_pred) # result.fold_scores.append(classifier.score(X_test, Y_test)) result.fold_scores.append(np.mean(Y_test == test_Y_pred)) if verbose: print '{} results for fold {}'.format(classifier_name, ofold) print classification_report(Y_test, test_Y_pred) print confusion_matrix(Y_test, test_Y_pred) print 'overall test accuracy so far:', 1 - result.test_error() print 'all folds completed' for (classifier_name, _) in classifiers: result = results[classifier_name] fs_util.save(os.path.join(self.output_path, '{}_result.pklz'.format(classifier_name)), result) # result print print 'SUMMARY for classifier', classifier_name print print 'fold scores: ', np.asarray(result.fold_scores) print print classification_report(result.test_Y_real, result.test_Y_pred) print confusion_matrix(result.test_Y_real, result.test_Y_pred) print print 'train accuracy:', 1 - result.train_error() print 'test accuracy :', 1 - result.test_error() return [results[classifier[0]].test_error() for classifier in classifiers] # error for each classifier
def pretrain_encoder(self, outer_fold_index, outer_fold): """ generic template that works with any model structure :param outer_fold_index: :param outer_fold: :return: """ import deepthought.util.fs_util as fs_util from deepthought.util.function_util import get_function fold_params_filename = os.path.join(self.output_path, 'fold_params_{}.pklz'.format(outer_fold_index)) inner_folds = self.fold_generator.get_inner_cv_folds(outer_fold) if os.path.isfile(fold_params_filename): # load trained network parameters from existing file fold_param_values = fs_util.load(fold_params_filename) print 'loaded trained fold network parameters from', fold_params_filename #assert len(fold_param_values) == len(inner_folds) else: # compute trial fold models fold_param_values = [] fold_errors = [] for ifi, ifold in enumerate(inner_folds): log.info('processing fold {}.{}: {}'.format(outer_fold_index, ifi, ifold)) train_selectors = self.fold_generator.get_fold_selectors( outer_fold=outer_fold['train'], inner_fold=ifold['train'], base_selectors=self.base_selectors) if 'valid' in ifold.keys(): valid_selectors = self.fold_generator.get_fold_selectors( outer_fold=outer_fold['train'], inner_fold=ifold['valid'], base_selectors=self.base_selectors) else: valid_selectors = None self.pretrain_model.set_parameter_values(self.init_param_values) # reset weights trained_model_param_values, best_error_valid = self.pretrain( self.pretrain_model, self.hyper_params, self.full_hdf5, self.full_meta, train_selectors, valid_selectors) fold_param_values.append(trained_model_param_values) fold_errors.append(best_error_valid) if 'only_1_inner_fold' in self.hyper_params and self.hyper_params['only_1_inner_fold']: print 'Stop after 1 inner fold requested (only_1_inner_fold=True).' break fold_errors = np.asarray(fold_errors).squeeze() print 'fold errors:', fold_errors # store trained network parameters for later analysis fs_util.save(fold_params_filename, fold_param_values) print 'parameters saved to', fold_params_filename # build encoder encoder = self.encoder_pipeline_factory.set_pipeline_parameters(self.encoder_model, fold_param_values) # transform dataset (re-using data_dict and working with indices as input) encoder_fn = get_function(encoder, allow_input_downcast=True) return encoder_fn
files = librosa.util.find_files(path, ext='wav', recurse=True); #, case_sensitive=False, limit=None, offset=0) for filename in files: x, y = loadfile( filename, auto_sample_rate=config.audio.autpsamplerate, samplerate=config.audio.samplerate, barsamples=config.audio.barsamples, maxbars=config.audio.maxbars); data.append(x); labels.append(y); data = np.vstack(data); # transform list to a big numpy array by stacking labels = np.vstack(labels); logging.info('loaded {0} values from {1} files in total'.format(data.shape, len(files))); # print labels; return (data, labels); if __name__ == '__main__': #pass # logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.DEBUG); # global config; config = Config(file('deepbeat.cfg')); logging.basicConfig(format=config.logger.pattern, level=logging.DEBUG); dataset = loadall(config.audio.path); #split = splitdata(dataset, ptest=config.audio.ptest, pvalid=config.audio.pvalid); #save(config.audio.datasetpath, split); save(config.audio.datasetpath, dataset); # load();
def save(filepath, data): return fs_util.save(filepath, data);
def import_dataset(source_path, target_path): # config = load_config(default_config='../train_sda.cfg'); # DATA_ROOT = source_path # DATA_ROOT = config.eeg.get('dataset_root', './') SAMPLE_RATE = 400 # in Hz TRIAL_LENGTH = 32 # in sec TRIAL_LENGTH += 4 # add 4s after end of presentation TRIAL_SAMPLE_LENGTH = SAMPLE_RATE * TRIAL_LENGTH log.info('using dataset at {}'.format(source_path)) ''' Note from Dan: All subjects should have channels 15, 16, 17 and 18 removed [...] If you want to make them truly identical, you could remove channel 19 from the subjects with more channels, although this should be 'good' data. ''' bad_channels = {} bad_channels[1] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[2] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[3] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[4] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[5] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[6] = [ 7, 8, 9, 12, 15, 16, 17, 18 ] bad_channels[7] = [5, 6, 12, 15, 16, 17, 18, 20 ] bad_channels[8] = [ 7, 8, 15, 16, 17, 18, 20, 21] bad_channels[9] = [5, 6, 12, 15, 16, 17, 18, 20 ] bad_channels[10] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[11] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[12] = [5, 6, 15, 16, 17, 18, 20, 21] bad_channels[13] = [5, 6, 12, 15, 16, 17, 18, 20 ] label_converter = LabelConverter() metadb_file = os.path.join(target_path, 'metadata_db.pklz') metadb = {} # empty DB with log_timing(log, 'generating datasets'): for subject_id in xrange(1,14): search_path = os.path.join(source_path, 'Sub{0:03d}*'.format(subject_id)) sourcefile_path = glob.glob(search_path) if sourcefile_path is None or len(sourcefile_path) == 0: log.warn('nothing found at {}'.format(search_path)) continue else: sourcefile_path = sourcefile_path[0] trials = split_session(sourcefile_path, TRIAL_SAMPLE_LENGTH) for stimulus, trial_data in trials.iteritems(): stimulus_id = label_converter.get_stimulus_id(stimulus) log.debug('processing {} with {} samples and stimulus_id {}'. format(stimulus,trial_data.shape,stimulus_id)) channels = trial_data.transpose() trial_data = [] channel_ids = [] for i, channel in enumerate(channels): channel_id = i+1 # filter bad channels if channel_id in bad_channels[subject_id]: log.debug('skipping bad channel {}'.format(channel_id)) continue # convert to float32 channel = np.asfarray(channel, dtype='float32') trial_data.append(channel) channel_ids.append(channel_id) trial_data = np.vstack(trial_data).transpose() # fromat: (samples, channels) log.debug('extracted {} from channels: {}'.format(trial_data.shape, channel_ids)) label = label_converter.get_label(stimulus_id, 'rhythm') # raw label, unsorted label = label_converter.shuffle_classes[label] # sorted label id metadata = { 'subject' : subject_id, 'label' : label, 'meta_label' : label_converter.get_label(stimulus_id, 'rhythm_meta'), 'stimulus' : stimulus, 'stimulus_id' : stimulus_id, 'rhythm_type' : label_converter.get_label(stimulus_id, 'rhythm'), 'tempo' : label_converter.get_label(stimulus_id, 'tempo'), 'audio_file' : label_converter.get_label(stimulus_id, 'audio_file'), 'trial_no' : 1, 'trial_type' : 'perception', 'condition' : 'n/a', 'channels' : channel_ids, } # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata log.debug('imported {}={} as {}'.format(label, metadata['meta_label'], savepath)) save(metadb_file, metadb, mkdirs=True) log.info('import finished')