def test_predict(self): lines = sys.stdin.readlines() reader = IOBReader(lines) extractors = [ FieldExtractor(reader.getPosition('FORM')), FieldExtractor(reader.getPosition('POS')), CapitalExtractor(reader.getPosition('FORM')), ] params = {'epochs':25, 'learning_rate':0.01, 'window_size':3, 'name_model':'model.ckpt'} classifier = Classifier(reader, extractors, LinearEstimator, **params) predicted = classifier.predict() #self.assertEqual(len(dataset), len(predicted)) labels_idx_rev = {v:k for k,v in reader.vocabulary[reader.getPosition('LABEL')].items()} i = 0 for line in lines: line = line.strip() if line: print '%s\t%s\t%s' % (line.split()[0], line.split()[1], labels_idx_rev[predicted[i]]) i += 1 else: print
def test_train(self): f = { 'fields': [ {'position': 0, 'name': 'FORM', 'type': str}, {'position': 1, 'name': 'POS', 'type': str}, {'position': 2, 'name': 'LABEL', 'type': str} ] } reader = IOBReader(sys.stdin, separator='\t', format=f) extractors = [ FieldExtractor(reader.getPosition('FORM')), FieldExtractor(reader.getPosition('POS')), CapitalExtractor(reader.getPosition('FORM')), ] params = {'epochs':25, 'learning_rate':0.01, 'window_size':3, 'name_model':'model.ckpt'} classifier = Classifier(reader, extractors, LinearEstimator, **params) classifier.train()