def subset_time(self, by, t_from, t_to): """ Returns a subsetted TemporalDataset with time between t_from to t_to Args: by(String): the descriptor by which the subset selection is made from channel dimension t_from: time-point from which onwards data should be subsetted t_to: time-point until which data should be subsetted Returns: TemporalDataset, with subset defined by the selected time_descriptor """ time = get_unique_unsorted(self.time_descriptors[by]) sel_time = [t for t in time if t <= t_to and t>=t_from] selection = bool_index(self.time_descriptors[by], sel_time) measurements = self.measurements[:, :, selection] descriptors = self.descriptors obs_descriptors = self.obs_descriptors channel_descriptors = self.channel_descriptors time_descriptors = subset_descriptor( self.time_descriptors, selection) dataset = TemporalDataset(measurements=measurements, descriptors=descriptors, obs_descriptors=obs_descriptors, channel_descriptors=channel_descriptors, time_descriptors=time_descriptors) return dataset
def split_time(self, by): """ Returns a list TemporalDataset splited by time Args: by(String): the descriptor by which the splitting is made Returns: list of TemporalDataset, splitted by the selected time_descriptor """ time = get_unique_unsorted(self.time_descriptors[by]) dataset_list = [] for v in time: selection = (self.time_descriptors[by] == v) measurements = self.measurements[:, :, selection] descriptors = self.descriptors obs_descriptors = self.obs_descriptors channel_descriptors = self.channel_descriptors time_descriptors = subset_descriptor( self.time_descriptors, selection) dataset = TemporalDataset(measurements=measurements, descriptors=descriptors, obs_descriptors=obs_descriptors, channel_descriptors=channel_descriptors, time_descriptors=time_descriptors) dataset_list.append(dataset) return dataset_list
def split_channel(self, by): """ Returns a list TemporalDataset splited by channels Args: by(String): the descriptor by which the splitting is made Returns: list of TemporalDataset, splitted by the selected channel_descriptor """ unique_values = get_unique_unsorted(self.channel_descriptors[by]) dataset_list = [] for v in unique_values: selection = (self.channel_descriptors[by] == v) measurements = self.measurements[:, selection, :] descriptors = self.descriptors.copy() descriptors[by] = v obs_descriptors = self.obs_descriptors channel_descriptors = subset_descriptor( self.channel_descriptors, selection) time_descriptors = self.time_descriptors dataset = TemporalDataset(measurements=measurements, descriptors=descriptors, obs_descriptors=obs_descriptors, channel_descriptors=channel_descriptors, time_descriptors=time_descriptors) dataset_list.append(dataset) return dataset_list
def convert_to_dataset(self, by): """ converts to Dataset long format. time dimension is absorbed into observation dimension Args: by(String): the descriptor which indicates the time dimension in the time_descriptor Returns: Dataset """ time = get_unique_unsorted(self.time_descriptors[by]) descriptors = self.descriptors channel_descriptors = self.channel_descriptors.copy() measurements = np.empty([0, self.n_channel]) obs_descriptors = dict.fromkeys(self.obs_descriptors, []) for key in self.time_descriptors: obs_descriptors[key] = np.array([]) for v in time: selection = (self.time_descriptors[by] == v) measurements = np.concatenate((measurements, self.measurements[:, :, selection].squeeze()), axis=0) for key in self.obs_descriptors: obs_descriptors[key] = np.concatenate((obs_descriptors[key], self.obs_descriptors[key].copy()), axis=0) for key in self.time_descriptors: obs_descriptors[key] = np.concatenate((obs_descriptors[key], np.repeat(self.time_descriptors[key][selection], self.n_obs)), axis=0) dataset = Dataset(measurements=measurements, descriptors=descriptors, obs_descriptors=obs_descriptors, channel_descriptors=channel_descriptors) return dataset
def split_obs(self, by): """ Returns a list Datasets splited by obs Args: by(String): the descriptor by which the splitting is made Returns: list of Datasets, splitted by the selected obs_descriptor """ unique_values = get_unique_unsorted(self.obs_descriptors[by]) dataset_list = [] for v in unique_values: selection = (self.obs_descriptors[by] == v) measurements = self.measurements[selection, :] descriptors = self.descriptors obs_descriptors = extract_dict(self.obs_descriptors, selection) channel_descriptors = self.channel_descriptors dataset = Dataset(measurements=measurements, descriptors=descriptors, obs_descriptors=obs_descriptors, channel_descriptors=channel_descriptors) dataset_list.append(dataset) return dataset_list
def test_get_unique_unsorted_ds(self): unique_values = du.get_unique_unsorted( self.data.obs_descriptors['conds']) assert np.all(np.array(['cond_foo', 'cond_bar']) == unique_values)
def test_get_unique_unsorted_ints(self): self.array = np.array([self.full_ints]) self.unique_unsorted = du.get_unique_unsorted(self.array) assert np.all(self.unique_unsorted == self.unique_ints)
def test_get_unique_unsorted_str(self): self.array = np.array([self.full_str.split(' ')]) self.unique_unsorted = du.get_unique_unsorted(self.array) assert np.all(self.unique_unsorted == self.unique_str.split(' '))