示例#1
0
    def __init__(
        self,
        positive_label: str,
        context_window: int,
        feature_extractor: FeatureExtractor,
        feature_summarizer: Callable[[List[Any]], torch.Tensor] = FeatureCollator.sum,
        linear_type: LinearType = LinearType.SVM_LINEAR,
        use_batch: bool = True,
        threshold: Optional[float] = 0.7,
        **kwargs,
    ):
        self.positive_label = positive_label
        self.feature_extractor = feature_extractor
        self.context_window = context_window
        super(LinearWindowFunction, self).__init__(
            positive_label,
            feature_extractor,
            context_window,
            use_batch=use_batch,
            threshold=threshold,
            **kwargs,
        )

        self.dictionary = TensorList()
        self.labels = TensorList()
        self.feature_summarizer = feature_summarizer
        self.linear_model = construct_linear_classifier(linear_type=linear_type)
示例#2
0
def extract_features(
    data: AnnotatedDataType,
    dataset_id: int,
    shuffle: bool,
    feature_extractor: Callable[[AnnotationType], torch.Tensor],
):
    positive_set: TensorList = TensorList()
    negative_set: TensorList = TensorList()
    for entry in data:
        tags: List[str] = entry['output']
        features: torch.Tensor = feature_extractor(entry)
        pos_idx, neg_idx = get_label_index(tags)

        positive_set.append(features[pos_idx])
        negative_set.append(features[neg_idx])

    positive_set: np.ndarray = positive_set.numpy()
    negative_set: np.ndarray = negative_set.numpy()
    positive_labels: np.ndarray = np.zeros((len(positive_set), ))
    positive_labels.fill(1)
    negative_labels: np.ndarray = np.zeros((len(negative_set)))
    x_train, y_train = construct_train_data(
        pos_data=positive_set,
        neg_data=negative_set,
        pos_labels=positive_labels,
        neg_labels=negative_labels,
        shuffle=shuffle,
    )

    return x_train, y_train
示例#3
0
    def __init__(
        self,
        positive_label: str,
        context_window: int,
        feature_extractor: FeatureExtractor,
        feature_summarizer: Callable[[List[Any]], torch.Tensor] = FeatureCollator.sum,
        use_batch: bool = True,
        threshold: Optional[float] = 0.7,
        parallelize: bool = False, # shared memory issue locally
        use_sparse: bool = False, # store dictionary as sparse matrix
        **kwargs,
    ):
        self.positive_label = positive_label
        self.feature_extractor = feature_extractor
        self.context_window = context_window
        self.parallelize = parallelize
        super(BagWindowFunction, self).__init__(
            positive_label,
            feature_extractor,
            context_window,
            use_batch=use_batch,
            threshold=threshold,
            **kwargs,
        )

        self.dictionary = SparseTensorList() if use_sparse else TensorList()
        self.labels = TensorList()
        self.feature_summarizer = feature_summarizer
示例#4
0
 def test_constructor_numpy(self):
     tl = TensorList(tensor_list=[
         np.zeros((1, TENSOR_EMBEDDING_DIM)),
         np.zeros((1, TENSOR_EMBEDDING_DIM)),
         np.zeros((1, TENSOR_EMBEDDING_DIM)),
     ])
     assert len(tl) == 3
     assert tl.shape == (3, TENSOR_EMBEDDING_DIM)
示例#5
0
 def test_constructor_tensor(self):
     tl = TensorList(tensor_list=[
         torch.zeros(1, TENSOR_EMBEDDING_DIM),
         torch.zeros(1, TENSOR_EMBEDDING_DIM),
         torch.zeros(1, TENSOR_EMBEDDING_DIM),
     ])
     assert len(tl) == 3
     assert tl.shape == (3, TENSOR_EMBEDDING_DIM)
示例#6
0
 def build_index(cls, sentence_embedder, dataset: UnlabeledBIODataset) -> TensorList:
     index = TensorList()
     for inst in dataset:
         sentence_embedding: torch.Tensor = sentence_embedder(
             sentence_ids=torch.Tensor([inst['id']]),
             dataset_ids=torch.Tensor([dataset.dataset_id]),
         )
         index.append(sentence_embedding)
     return index
示例#7
0
    def test_numpy(self):
        def _create_list():
            return [
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
            ]

        tl = TensorList(tensor_list=_create_list())
        assert type(tl.numpy()) == np.ndarray
示例#8
0
    def test_tensor(self):
        def _create_list():
            return [
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
            ]

        tl = TensorList(tensor_list=_create_list())
        assert type(tl.tensor()) == torch.Tensor
示例#9
0
 def test_append(self):
     tl = TensorList(tensor_list=[
         torch.zeros(1, TENSOR_EMBEDDING_DIM),
         torch.zeros(1, TENSOR_EMBEDDING_DIM),
         torch.zeros(1, TENSOR_EMBEDDING_DIM),
     ])
     assert len(tl) == 3
     assert tl.shape == (3, TENSOR_EMBEDDING_DIM)
     tl.append(np.zeros((1, TENSOR_EMBEDDING_DIM)))
     assert len(tl) == 4
     assert tl.shape == (4, TENSOR_EMBEDDING_DIM)
示例#10
0
def construct_train_data(
    pos_data: np.ndarray,
    neg_data: np.ndarray,
    pos_labels: np.ndarray,
    neg_labels: np.ndarray,
    shuffle: Optional[bool] = False,
) -> Tuple[np.ndarray, np.ndarray]:
    train_data: TensorList = TensorList()
    train_labels: TensorList = TensorList()

    train_data.append(pos_data)
    train_data.append(neg_data)

    train_labels.append(pos_labels)
    train_labels.append(neg_labels)

    if shuffle:
        x = train_data.tensor()
        y = train_labels.tensor()
        idx = torch.randperm(len(x))
        return x[idx].numpy(), y[idx].numpy()
    else:
        return train_data.numpy(), train_labels.numpy()
示例#11
0
    def test_tensor_list(self):
        def _create_list():
            return [
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
            ]

        list_tensor = _create_list()
        tl = TensorList(tensor_list=list_tensor)
        created_list = tl.to_list()
        assert type(created_list) == type(list_tensor)
        assert all(
            (t1 == t2).all() for t1, t2 in zip(list_tensor, created_list))
示例#12
0
    def test_extend(self):
        def _create_list():
            return [
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
            ]

        tl = TensorList(tensor_list=_create_list())
        assert len(tl) == 3
        assert tl.shape == (3, TENSOR_EMBEDDING_DIM)
        tl.extend(_create_list())
        assert len(tl) == 6
        assert tl.shape == (6, TENSOR_EMBEDDING_DIM)
示例#13
0
    def test_contains_tensor(self):
        def _create_list():
            return [
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
            ]

        list_tensor = _create_list()
        tl = TensorList(tensor_list=list_tensor)
        found_index = tl.contains(torch.zeros(1, TENSOR_EMBEDDING_DIM))
        assert found_index == 0

        found_index = tl.contains(torch.zeros(1, TENSOR_EMBEDDING_DIM) + 1)
        assert found_index == -1
示例#14
0
 def _batch_predict(self, features: List[List[torch.Tensor]]) -> List[int]:
     feature_summaries: List[np.ndarray] = list(map(lambda f: self.feature_summarizer(f).numpy(), features))
     batch_np: np.ndarray = TensorList(feature_summaries).numpy()
     label_batch: np.ndarray = self.linear_model.predict(batch_np)
     return list(map(lambda label: label.item(), TensorList([label_batch]).to_list()))
示例#15
0
 def _batch_probabilities(self, features: List[List[torch.Tensor]]) -> List[float]:
     feature_summaries: List[np.ndarray] = list(map(lambda f: self.feature_summarizer(f).numpy(), features))
     batch_np: np.ndarray = TensorList(feature_summaries).numpy()
     confidence_batch: np.ndarray = self.linear_model.decision_function(batch_np)
     return list(map(lambda conf: conf.item(), TensorList([confidence_batch]).to_list()))
示例#16
0
 def concat(cls, features: List[torch.Tensor]) -> torch.Tensor:
     tl = TensorList(features)
     return tl.tensor().reshape(1, -1)
示例#17
0
 def test_empty_construct(self):
     tl = TensorList()
     assert len(tl) == 0
     assert tl.shape == (0, )