Пример #1
0
def get_tsfresh(data):
    dataset = Dataset(data_array=data, data_labels=data, BATCH_SIZE=BATCH_SIZE)
    extraction_settings = ComprehensiveFCParameters(
    )  #EfficientFCParameters()#MinimalFCParameters()#
    features_to_return = []
    start_time = time.time()
    eval_not_finished = 1
    while eval_not_finished != 0:
        # time_checked = check_times(times[i])
        data_batch, _ = dataset.get_batch_eval()
        batch_df = get_data_as_df(data_batch)
        X = extract_features(batch_df,
                             column_id='ids',
                             column_sort='time',
                             default_fc_parameters=extraction_settings,
                             impute_function=impute,
                             n_jobs=10)
        impute(X)
        fetures_batch = X.values
        features_to_return.append(fetures_batch)
        eval_not_finished = dataset.BATCH_COUNTER_EVAL
        if dataset.BATCH_COUNTER_EVAL % 100 == 0:
            time_usage = str(
                datetime.timedelta(seconds=int(round(time.time() -
                                                     start_time))))
            print("it %i Time usage: %s" %
                  (dataset.BATCH_COUNTER_EVAL, str(time_usage)),
                  flush=True)
    features_to_return = np.concatenate(features_to_return)
    time_usage = str(
        datetime.timedelta(seconds=int(round(time.time() - start_time))))
    print("Total Time usage: %s\n" % (str(time_usage)), flush=True)
    return features_to_return
Пример #2
0
def merge_datasets_dict(datasets_dict1, datasets_dict2):
  merged_datasets_dict = {}
  for set in datasets_dict1.keys():
    data_array = np.concatenate([datasets_dict1[set].data_array,
                                 datasets_dict2[set].data_array])
    data_label = np.concatenate([datasets_dict1[set].data_label,
                                 datasets_dict2[set].data_label])
    merged_datasets_dict[set] = Dataset(data_array, data_label, batch_size=50)
  return merged_datasets_dict
Пример #3
0
def generated_images_to_dataset(gen_imgs, label=1):
  dataset = Dataset(data_array=gen_imgs,
                    data_label=np.ones(gen_imgs.shape[0]) * label,
                    batch_size=50)
  data_splitter = DatasetDivider(test_size=0.12, validation_size=0.08)
  data_splitter.set_dataset_obj(dataset)
  train_dataset, test_dataset, val_dataset = \
    data_splitter.get_train_test_val_set_objs()
  datasets_dict = {
    general_keys.TRAIN: train_dataset,
    general_keys.VALIDATION: val_dataset,
    general_keys.TEST: test_dataset
  }
  return datasets_dict
 def clean_misshaped(self, dataset: Dataset):
     samples_clone = list(dataset.data_array[:])
     labels_clone = list(dataset.data_label[:])
     metadata_clone = list(dataset.meta_data[:])
     miss_shaped_sample_idx = self._get_misshaped_samples_idx(samples_clone)
     for index in sorted(miss_shaped_sample_idx, reverse=True):
         samples_clone.pop(index)
         labels_clone.pop(index)
         metadata_clone.pop(index)
     if self.verbose:
         print('%i misshaped samples removed\n%s' %
               (len(miss_shaped_sample_idx),
                self._get_string_label_count(labels_clone)),
               flush=True)
     dataset = Dataset(data_array=samples_clone,
                       data_label=labels_clone,
                       meta_data=metadata_clone,
                       batch_size=dataset.batch_size)
     return dataset
 def clean_nans(self, dataset: Dataset):
     samples_clone = list(dataset.data_array[:])
     labels_clone = list(dataset.data_label[:])
     metadata_clone = list(dataset.meta_data[:])
     nans_sample_idx = self._get_nans_samples_idx(samples_clone)
     self._check_nan_all_removed(samples_clone, nans_sample_idx)
     for index in sorted(nans_sample_idx, reverse=True):
         samples_clone.pop(index)
         labels_clone.pop(index)
         metadata_clone.pop(index)
     if self.verbose:
         print('%i samples with NaNs removed\n%s' %
               (len(nans_sample_idx),
                self._get_string_label_count(labels_clone)),
               flush=True)
     dataset = Dataset(data_array=samples_clone,
                       data_label=labels_clone,
                       batch_size=dataset.batch_size,
                       meta_data=metadata_clone)
     return dataset
 def _dict_to_dataset(self, data_dict):
     dataset = Dataset(data_array=data_dict[general_keys.IMAGES],
                       data_label=data_dict[general_keys.LABELS],
                       batch_size=self.batch_size)
     return dataset