def get_tsfresh(data): dataset = Dataset(data_array=data, data_labels=data, BATCH_SIZE=BATCH_SIZE) extraction_settings = ComprehensiveFCParameters( ) #EfficientFCParameters()#MinimalFCParameters()# features_to_return = [] start_time = time.time() eval_not_finished = 1 while eval_not_finished != 0: # time_checked = check_times(times[i]) data_batch, _ = dataset.get_batch_eval() batch_df = get_data_as_df(data_batch) X = extract_features(batch_df, column_id='ids', column_sort='time', default_fc_parameters=extraction_settings, impute_function=impute, n_jobs=10) impute(X) fetures_batch = X.values features_to_return.append(fetures_batch) eval_not_finished = dataset.BATCH_COUNTER_EVAL if dataset.BATCH_COUNTER_EVAL % 100 == 0: time_usage = str( datetime.timedelta(seconds=int(round(time.time() - start_time)))) print("it %i Time usage: %s" % (dataset.BATCH_COUNTER_EVAL, str(time_usage)), flush=True) features_to_return = np.concatenate(features_to_return) time_usage = str( datetime.timedelta(seconds=int(round(time.time() - start_time)))) print("Total Time usage: %s\n" % (str(time_usage)), flush=True) return features_to_return
def merge_datasets_dict(datasets_dict1, datasets_dict2): merged_datasets_dict = {} for set in datasets_dict1.keys(): data_array = np.concatenate([datasets_dict1[set].data_array, datasets_dict2[set].data_array]) data_label = np.concatenate([datasets_dict1[set].data_label, datasets_dict2[set].data_label]) merged_datasets_dict[set] = Dataset(data_array, data_label, batch_size=50) return merged_datasets_dict
def generated_images_to_dataset(gen_imgs, label=1): dataset = Dataset(data_array=gen_imgs, data_label=np.ones(gen_imgs.shape[0]) * label, batch_size=50) data_splitter = DatasetDivider(test_size=0.12, validation_size=0.08) data_splitter.set_dataset_obj(dataset) train_dataset, test_dataset, val_dataset = \ data_splitter.get_train_test_val_set_objs() datasets_dict = { general_keys.TRAIN: train_dataset, general_keys.VALIDATION: val_dataset, general_keys.TEST: test_dataset } return datasets_dict
def clean_misshaped(self, dataset: Dataset): samples_clone = list(dataset.data_array[:]) labels_clone = list(dataset.data_label[:]) metadata_clone = list(dataset.meta_data[:]) miss_shaped_sample_idx = self._get_misshaped_samples_idx(samples_clone) for index in sorted(miss_shaped_sample_idx, reverse=True): samples_clone.pop(index) labels_clone.pop(index) metadata_clone.pop(index) if self.verbose: print('%i misshaped samples removed\n%s' % (len(miss_shaped_sample_idx), self._get_string_label_count(labels_clone)), flush=True) dataset = Dataset(data_array=samples_clone, data_label=labels_clone, meta_data=metadata_clone, batch_size=dataset.batch_size) return dataset
def clean_nans(self, dataset: Dataset): samples_clone = list(dataset.data_array[:]) labels_clone = list(dataset.data_label[:]) metadata_clone = list(dataset.meta_data[:]) nans_sample_idx = self._get_nans_samples_idx(samples_clone) self._check_nan_all_removed(samples_clone, nans_sample_idx) for index in sorted(nans_sample_idx, reverse=True): samples_clone.pop(index) labels_clone.pop(index) metadata_clone.pop(index) if self.verbose: print('%i samples with NaNs removed\n%s' % (len(nans_sample_idx), self._get_string_label_count(labels_clone)), flush=True) dataset = Dataset(data_array=samples_clone, data_label=labels_clone, batch_size=dataset.batch_size, meta_data=metadata_clone) return dataset
def _dict_to_dataset(self, data_dict): dataset = Dataset(data_array=data_dict[general_keys.IMAGES], data_label=data_dict[general_keys.LABELS], batch_size=self.batch_size) return dataset