Exemplo n.º 1
0
    def setup_class(cls):
        # create a sample dataset to test
        cls.dataset = Dataset()
        part = Part('some text c.A100G p.V100Q some text')
        part.sentences = [[
            Token('some', 0),
            Token('text', 5),
            Token('c', 10),
            Token('.', 11),
            Token('A', 12),
            Token('100', 13),
            Token('G', 16),
            Token('p', 18),
            Token('.', 19),
            Token('V', 20),
            Token('100', 21),
            Token('Q', 24),
            Token('some', 26),
            Token('text', 31)
        ]]

        predicted_labels = [
            'O', 'O', 'B', 'I', 'I', 'I', 'E', 'A', 'I', 'I', 'I', 'E', 'O',
            'O'
        ]

        for index, label in enumerate(predicted_labels):
            part.sentences[0][index].predicted_labels = [Label(label)]

        cls.dataset.documents['doc_1'] = Document()
        cls.dataset.documents['doc_1'].parts['p1'] = part

        part = Part('test edge case DNA A927B test')
        part.sentences = [[
            Token('test', 0),
            Token('edge', 5),
            Token('case', 10),
            Token('DNA', 15),
            Token('A', 19),
            Token('927', 20),
            Token('B', 23),
            Token('test', 25)
        ]]

        predicted_labels = ['O', 'O', 'O', 'O', 'M', 'P', 'M', 'O']

        for index, label in enumerate(predicted_labels):
            part.sentences[0][index].predicted_labels = [Label(label)]

        cls.dataset.documents['doc_1'].parts['p2'] = part
Exemplo n.º 2
0
    def setUp(self):
        part = Part('Make making made. Try tried tries.')
        part.sentences = [[Token('Make', 0), Token('making', 5), Token('made', 12)],
                          [Token('Try', 18), Token('tried', 22), Token('tries', 28)]]

        self.dataset = Dataset()
        self.dataset.documents['doc_1'] = Document()
        self.dataset.documents['doc_1'].parts['part_1'] = part

        self.generator = PorterStemFeatureGenerator()
Exemplo n.º 3
0
    def setUp(self):
        part = Part('Make making made. Try tried tries.')
        part.sentences = [[
            Token('Make', 0),
            Token('making', 5),
            Token('made', 12)
        ], [Token('Try', 18),
            Token('tried', 22),
            Token('tries', 28)]]
        self.dataset = Dataset()
        self.dataset.documents['doc_1'] = Document()
        self.dataset.documents['doc_1'].parts['part_1'] = part

        for token in self.dataset.tokens():
            token.features['a'] = 'a'
            token.features['b'] = 'b'
Exemplo n.º 4
0
    def setUp(self):
        part = Part('Word1 word2 word3. Word4 word5 word6.')
        part.sentences = [[
            Token('Word1', 0),
            Token('word2', 6),
            Token('word3', 12)
        ], [Token('Word4', 19),
            Token('word5', 25),
            Token('word6', 31)]]

        self.dataset = Dataset()
        self.dataset.documents['doc_1'] = Document()
        self.dataset.documents['doc_1'].parts['part_1'] = part

        self.simple_generator = SimpleFeatureGenerator()
        self.sentence_generator = SentenceMarkerFeatureGenerator()