示例#1
0
def nonlinearity(tensor, xs, ys):
    assert len(xs) == len(ys)

    output = torch.ones_like(tensor, device=tensor.device) * ys[0]

    for (x1, x2), (y1, y2) in zip(windowed(xs, 2), windowed(ys, 2)):
        output += (y2 - y1) / (x2 - x1) * (torch.clamp(tensor, x1, x2) - x1)

    return output
示例#2
0
def nonlinearity(tensor, xs, ys):
    # TODO: This could probably be sped up quite a bit with torch.search_sorted
    assert len(xs) == len(ys)

    output = torch.ones_like(tensor, device=tensor.device) * ys[0]

    for (x1, x2), (y1, y2) in zip(windowed(xs, 2), windowed(ys, 2)):
        output += (y2 - y1) / (x2 - x1) * (torch.clamp(tensor, x1, x2) - x1)

    return output
示例#3
0
    def _words_per_seg(self, lengths, labels):
        """ Count words included in each segment """
        # Get indexes of boundary segmentations (where 1s are)
        indexes = self._boundary_ids(labels)

        # How many words are in each sentence
        seg_words = [sum(lengths[i1:i2]) for i1, i2 in windowed(indexes, 2)]

        # Happens when zero segmentations predicted
        if not seg_words:
            seg_words = [sum(lengths)]

        return seg_words
示例#4
0
def compute_idx_spans(sentences, L=10):
    """ Compute span indexes for all possible spans up to length L in each
    sentence """
    idx_spans, shift = [], 0
    for sent in sentences:
        sent_spans = flatten([
            windowed(range(shift,
                           len(sent) + shift), length)
            for length in range(1, L)
        ])
        idx_spans.extend(sent_spans)
        shift += len(sent)

    return idx_spans
示例#5
0
 def reravel(self):
     """ Group sentences back into subsections
     (Used in Hearst baseline) """
     intervals = windowed(self.indexes, 2)
     reraveled = [self.sents[i:j] for i, j in windowed(self.indexes, 2)]
     return reraveled
示例#6
0
 def spans_in_sentence(sent, length,shift):
     return windowed(range(shift, len(sent)+shift), length)
def _indexes(aligned):
    """ Return tuples of (start, stop) indexes for documents """
    return windowed([
        i
        for i, (title, _) in enumerate(aligned) if title == 'TOP-LEVEL SEGMENT'
    ] + [len(aligned)], 2)
示例#8
0
 def ngrams(self, n=1):
     for ng in windowed(self.tokens, n):
         yield '_'.join(ng)
示例#9
0
def compute_idx_spans(tokens, L=10):
    """ Compute all possible token spans """
    return flatten(
        [windowed(range(len(tokens)), length) for length in range(1, L)])
示例#10
0
def pair(spans):
    return windowed(spans, 2)