Exemplo n.º 1
0
 def __init__(self, filename):
     self.Y = Alphabet()
     data = list(
         fromSGML(filename, linegrouper="<NEW.*?>", bioencoding=False))
     np.random.shuffle(data)
     super(CoraCitations, self).__init__(train=data[len(data) // 5:],
                                         dev=data[:len(data) // 5],
                                         test=[])
     self.train = self.make_instances('train', Instance)
     self.dev = self.make_instances('dev', Instance)
Exemplo n.º 2
0
 def get_data(f):
     for x in fromSGML(f, linegrouper="<NEW.*?>", bioencoding=False):
         x, y = zip(*[(Token(w), y) for y, w in x])
         preprocessing(x)
         yield Instance(x, truth=y)