def test_seq_features(): expected = ({(EMIT, 'N', 'fish'): 1.0, (EMIT, 'V', 'can'): 2.0, (OFFSET, 'V'): 2.0, (EMIT, 'N', 'they'): 1.0, \ (OFFSET, 'N'): 2.0, (OFFSET, END_TAG): 1.0}) actual = features.seqFeatures(sent, ['N', 'V', 'V', 'N'], features.wordFeatures) eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, Tinit=0): """ :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights :param weights: A defaultdict of weights :param wsum: A defaultdict of weight sums :param Tinit: the initial value of the $t$ counter at the beginning of this iteration :returns weights: a defaultdict of weights :returns wsum: a defaultdict of weight sums, for averaging :returns tr_acc: the training accuracy :returns i: the number of instances (sentences) seen """ tr_err = 0. tr_tot = 0. # your code for i,(words,y_true) in enumerate(inst_generator): y_pred, score = viterbiTagger(words, featfunc, weights, tagset) # if '!' not in y_true and i > 0: # print i, words # print y_true # # Make sure features is right # for feat, value in seqFeatures(words, y_true, featfunc).iteritems(): # print feat, value # # print if y_pred != y_true: for feat, value in seqFeatures(words, y_true, featfunc).iteritems(): wsum[feat] += (Tinit+i)*value weights[feat] += value for feat, value in seqFeatures(words, y_pred, featfunc).iteritems(): wsum[feat] -= (Tinit+i)*value weights[feat] -= value tr_err += sum([y_true[m] != y_pred[m] for m, _ in enumerate(y_true)]) tr_tot += len(words) return weights, wsum, 1-tr_err/tr_tot, i
def oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, Tinit=0): """ :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights :param weights: A defaultdict of weights :param wsum: A defaultdict of weight sums :param Tinit: the initial value of the $t$ counter at the beginning of this iteration :returns weights: a defaultdict of weights :returns wsum: a defaultdict of weight sums, for averaging :returns tr_acc: the training accuracy :returns i: the number of instances (sentences) seen """ tr_err = 0. tr_tot = 0. t = Tinit for i,(words,y_true) in enumerate(inst_generator): pred = viterbiTagger(words, featfunc, weights, tagset)[0] pred_feat = seqFeatures(words, pred, featfunc) true_feat = seqFeatures(words, y_true, featfunc) for key in pred_feat: weights[key] -= pred_feat[key] wsum[key] -= t * pred_feat[key] for key in true_feat: weights[key] += true_feat[key] wsum[key] += t * true_feat[key] for m in range(len(words)): if pred[m] != y_true[m]: tr_err += 1 tr_tot += len(words) t += 1 # your code return weights, wsum, 1-tr_err/tr_tot, i
def oneItAvgStructPerceptron(inst_generator, featfunc, weights, wsum, tagset, Tinit=0): """ :param inst_generator: A generator of (words,tags) tuples :param tagger: A function from (words, weights) to tags :param features: A function from (words, tags) to a dict of features and weights :param weights: A defaultdict of weights :param wsum: A defaultdict of weight sums :param Tinit: the initial value of the $t$ counter at the beginning of this iteration :returns weights: a defaultdict of weights :returns wsum: a defaultdict of weight sums, for averaging :returns tr_acc: the training accuracy :returns i: the number of instances (sentences) seen """ tr_err = 0. tr_tot = 0. t = Tinit for i, (words, y_true) in enumerate(inst_generator): pred = viterbiTagger(words, featfunc, weights, tagset)[0] pred_feat = seqFeatures(words, pred, featfunc) true_feat = seqFeatures(words, y_true, featfunc) for key in pred_feat: weights[key] -= pred_feat[key] wsum[key] -= t * pred_feat[key] for key in true_feat: weights[key] += true_feat[key] wsum[key] += t * true_feat[key] for m in range(len(words)): if pred[m] != y_true[m]: tr_err += 1 tr_tot += len(words) t += 1 # your code return weights, wsum, 1 - tr_err / tr_tot, i
def test_seq_trans_features(): expected = ({(TRANS, 'N', '--START--'): 1.0, (TRANS, '--END--', 'N'): 1.0, (EMIT, 'N', 'fish'): 1.0, (EMIT, 'V', 'can'): 2.0, \ (OFFSET, 'V'): 2.0, (EMIT, 'N', 'they'): 1.0, (TRANS, 'V', 'V'): 1.0, (TRANS, 'N', 'V'): 1.0, (OFFSET, 'N'): 2.0, (TRANS, 'V', 'N'): 1.0,\ (OFFSET, END_TAG): 1.0}) actual = features.seqFeatures(sent,['N','V','V','N'],features.wordTransFeatures) eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual) )