def output_sentence(sent): label_key = 'cpos' if args['--coarse'] else 'pos' for i in range(len(sent['word'])): print >>data_out, "{label} '{name}-{sent_i}-{token_i}|".format( label=normalize_label(sent[label_key][i]), name=args['--name'], sent_i=sent_i, token_i=i+1), print >>data_out, u" ".join(features_for_token(sent['word'], sent[label_key], i))
def output_sentence(sent): label_key = 'cpos' if args['--coarse'] else 'pos' if args['--feature-set'] == "dependency": for i in xrange(len(sent["word"])): try: print >>data_out, u"{label} '{sent_i}-{token_i}{features}".format( label=normalize_label(sent['dependency'][i]), sent_i=sent_i, token_i=i, features=u" ".join(features_for_token(sent['word'], sent[label_key], i))) #We need this check for sentences we skip except IndexError: continue else: for i in xrange(len(sent["word"])): print >>data_out, "{label} '{name}-{sent_i}-{token_i}|".format( label=normalize_label(sent[label_key][i]), name=args['--name'], sent_i=sent_i, token_i=i + 1), print >>data_out, u" ".join( features_for_token(sent['word'], sent[label_key], i))
def write_sentences_to_file(sents, data_out): for sent_i, sent in enumerate(sents): tokens = [token for token, pos in sent] tags = [pos for token, pos in sent] if sent_i > 0: print >> data_out, "" for token_i in range(len(sent)): print >> data_out, "{label} '{name}-{sent_i}-{token_i}|".format( label=normalize_label(tags[token_i]), name='brown', sent_i=sent_i, token_i=token_i), print >> data_out, u" ".join(honnibal13(tokens, tags, token_i))