def process_y_dataset(self, data: List[List[str]], max_len: Optional[int] = None, subset: Optional[List[int]] = None) -> np.ndarray: if subset is not None: target = utils.get_list_subset(data, subset) else: target = data[:] y = np.array(target) return y
def process_y_dataset(self, data: List[str], max_len: Optional[int] = None, subset: Optional[List[int]] = None) -> np.ndarray: if subset is not None: target = utils.get_list_subset(data, subset) else: target = data if self.multi_label: return self.multi_label_binarizer.fit_transform(target) else: numerized_samples = self.numerize_label_sequences(target) return to_categorical(numerized_samples, len(self.label2idx))
def process_x_dataset(self, data: List[List[str]], max_len: Optional[int] = None, subset: Optional[List[int]] = None) -> np.ndarray: if max_len is None: max_len = self.sequence_length if subset is not None: target = utils.get_list_subset(data, subset) else: target = data numerized_samples = self.numerize_token_sequences(target) return pad_sequences(numerized_samples, max_len, padding='post', truncating='post')
def process_y_dataset(self, data: List[List[str]], max_len: Optional[int] = None, subset: Optional[List[int]] = None) -> np.ndarray: if subset is not None: target = utils.get_list_subset(data, subset) else: target = data[:] numerized_samples = self.numerize_label_sequences(target) padded_seq = pad_sequences(numerized_samples, max_len, padding='post', truncating='post') return to_categorical(padded_seq, len(self.label2idx))
def test_get_list_subset(self): x = list(range(0, 100)) subset = get_list_subset(x, list(range(10, 20))) assert subset == [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]