def interim(model_name, setname): """ Convert GAN image samples to tfrecord """ _dir_results = os.path.join('models', model_name, 'results', setname, 'Samples') _dir_interim = os.path.join('data/interim', model_name, setname) utils.checkfolder(_dir_interim) # list filenames and classes. Also divides filenames into equally sized shards filenames, class_names = _get_filenames_and_classes(_dir_results) # save class dictionary class_dict = dict(zip(class_names, range(len(class_names)))) utils.save_dict(class_dict, _dir_interim, 'class_dict.json') # convert images to tf records based on the list of filenames for shard_n in range(_NUM_SHARDS): utils.show_message('Processing shard %d/%d' % (shard_n+1,_NUM_SHARDS)) tf_filename = _get_output_filename(_dir_interim, shard_n) with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: _convert_to_tfrecord(filenames[shard_n], class_dict, tfrecord_writer) print('\nFinished converting GAN samples to tfrecord for %s %s!' % (model_name, setname))
def save(self, path): info = dict() m = self.mean s = self.stdv for i, ch_name in enumerate(self.ch_names): info[ch_name] = { 'mean_microvolt': float(m[i][0]), 'stdv_microvolt': float(s[i][0]) } save_dict(info, path)
def prepare(self): train_files, train_labels = self._get_all_sorted_file_names_and_labels(train=True) assert len(train_files) == len(train_labels) and len(train_files) != 0 test_files, test_labels = self._get_all_sorted_file_names_and_labels(train=False) assert len(test_files) == len(test_labels) and len(test_files) != 0 # Find out normalization statistics: preprocessing_functions = self.default_preprocessing_functions() ch_names = DataGenerator.wanted_electrodes['EEG'] if self.use_ekg: ch_names = ch_names + DataGenerator.wanted_electrodes['EKG'] for split_type, split_files in zip(['train', 'test'], [train_files, test_files]): output_data_dir = os.path.join(self.cache_path, split_type, 'data') output_info_dir = os.path.join(self.cache_path, split_type, 'info') os.makedirs(output_data_dir, exist_ok=True) os.makedirs(output_info_dir, exist_ok=True) # Could be parallelized in the future for i, file in enumerate(split_files): try: sensor_types = ('EEG', 'EKG1') if self.use_ekg else ('EEG',) data, info_dict = self._load_file(file, preprocessing_functions, sensor_types) except RuntimeError: sensor_types = ('EEG', 'EKG') if self.use_ekg else ('EEG',) data, info_dict = self._load_file(file, preprocessing_functions, sensor_types) # Find normalization for the data mean = np.mean(data, dtype=np.float32) std = np.std(data, dtype=np.float32) info_dict['mean'] = float(mean) info_dict['std'] = float(std) name = '%s_%s_Age_%s_Gender_%s' % (str(info_dict['recording_date']), str(info_dict['sequence_name']), str(info_dict['age']), info_dict['gender']) output_file_path = os.path.join(output_data_dir, name + '_raw.fif') output_info_path = os.path.join(output_info_dir, name + '.p') info = mne.create_info(ch_names, sfreq=self.sampling_freq) fif_array = mne.io.RawArray(data, info) fif_array.save(output_file_path) save_dict(info_dict, output_info_path) print('Split Type: %s, Progress: %g' % (split_type, (i+1)/len(split_files)))
def process(dataset_part): """Runs the conversion operation. Args: dataset_part: The dataset part to be converted [Nonsegmented, Segmented]. """ if dataset_part == 'Nonsegmented': _dir_raw = _DIR_RAW_NONSEGMENTED _dir_processed = _DIR_PROCESSED_NONSEGMENTED setname = 'Nonsegmented' else: _dir_raw = _DIR_RAW_SEGMENTED _dir_processed = _DIR_PROCESSED_SEGMENTED setname = 'Segmented' if _EXCLUDED_GRASSES: exclude_list = ['Black-grass', 'Common wheat', 'Loose Silky-bent'] else: exclude_list = [] # extract raw data data_filename = os.path.join(_dir_raw) archive = zipfile.ZipFile(data_filename) archive.extractall(_dir_processed) # list filenames and classes. Also divides filenames into equally sized shards filenames, class_names = _get_filenames_and_classes( _dir_processed, [setname], exclude_list) # save class dictionary class_dict = dict(zip(class_names, range(len(class_names)))) utils.save_dict(class_dict, _dir_processed, 'class_dict.json') # convert images to tf records based on the list of filenames for shard_n in range(_NUM_SHARDS): utils.show_message('Processing shard %d/%d' % (shard_n + 1, _NUM_SHARDS)) tf_filename = _get_output_filename(_dir_processed, shard_n) with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: _convert_to_tfrecord(filenames[shard_n], class_dict, tfrecord_writer) # clean up tmp_dir = os.path.join(_dir_processed, setname) tf.gfile.DeleteRecursively(tmp_dir) print('\nFinished converting the PSD %s dataset!' % setname)
validation_stories, _, _ = read_babi(path_babi_base, to_read_val, args.babi_tasks, only_relevant=args.only_relevant) validation_stories = vectorize_babi(validation_stories, dictionary, args.batch_size_stories, device) test_stories, _, _ = read_babi(path_babi_base, to_read_test, args.babi_tasks, only_relevant=args.only_relevant) test_stories = vectorize_babi(test_stories, dictionary, args.batch_size_stories, device) if not args.load: save_dict(dictionary) else: dictionary = load_dict() dict_size = len(dictionary) print("Dictionary size: ", dict_size) print("Done reading babi!") lstm = LSTM(args.hidden_dim_lstm, args.batch_size_stories, dict_size, args.emb_dim, args.lstm_layers, device).to(device) rn = RelationNetwork(args.hidden_dim_lstm, args.hidden_dims_g, args.output_dim_g, args.hidden_dims_f, dict_size, args.batch_size_stories, device).to(device) if args.load:
def process(dataset_part): """Runs the download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ if dataset_part == 'Nonsegmented': _dir_raw = _DIR_RAW_NONSEGMENTED _dir_processed = _DIR_PROCESSED_NONSEGMENTED setname = 'Nonsegmented' #training_filename = _get_output_filename(_DIR_PROCESSED_NONSEGMENTED, 'train') # testing_filename = _get_output_filename(_DIR_PROCESSED_NONSEGMENTED, 'test') else: _dir_raw = _DIR_RAW_SEGMENTED _dir_processed = _DIR_PROCESSED_SEGMENTED setname = 'Segmented' #training_filename = _get_output_filename(_DIR_PROCESSED_SEGMENTED, 'train') # testing_filename = _get_output_filename(_DIR_PROCESSED_SEGMENTED, 'test') #if tf.gfile.Exists(training_filename): #and tf.gfile.Exists(testing_filename): # print('Dataset files already exist. Exiting without re-creating them.') # return if _EXCLUDED_GRASSES: exclude_list = ['Black-grass', 'Common wheat', 'Loose Silky-bent'] else: exclude_list = [] # First, process training data: data_filename = os.path.join(_dir_raw) archive = zipfile.ZipFile(data_filename) archive.extractall(_dir_processed) filenames, class_names = _get_filenames_and_classes(_dir_processed, [setname], exclude_list) class_dict = dict(zip(class_names, range(len(class_names)))) utils.save_dict(class_dict, _dir_processed, 'class_dict.json') for shard_n in range(_NUM_SHARDS): utils.show_message('Processing shard %d/%d' % (shard_n+1,_NUM_SHARDS)) tf_filename = _get_output_filename(_dir_processed, shard_n) with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: _convert_to_tfrecord(filenames[shard_n], class_dict, tfrecord_writer) tmp_dir = os.path.join(_dir_processed, setname) tf.gfile.DeleteRecursively(tmp_dir) # # First, process test data: # with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer: # data_filename = os.path.join(_dir_raw) # archive = zipfile.ZipFile(data_filename) # archive.extractall(_dir_processed) # # filenames, class_names = _get_filenames_and_classes(_dir_processed, [setname, 'test'], exclude_list) # class_dict = dict(zip(class_names, range(len(class_names)))) # _convert_to_tfrecord(filenames, class_dict, tfrecord_writer) # tmp_dir = os.path.join(_dir_processed, setname) # tf.gfile.DeleteRecursively(tmp_dir) print('\nFinished converting the PSD %s dataset!' % setname)