示例#1
0
 def output_sentence(sent):
     label_key = 'cpos' if args['--coarse'] else 'pos'
     for i in range(len(sent['word'])):
         print >>data_out, "{label} '{name}-{sent_i}-{token_i}|".format(
             label=normalize_label(sent[label_key][i]),
             name=args['--name'],
             sent_i=sent_i,
             token_i=i+1),
         print >>data_out, u" ".join(features_for_token(sent['word'], sent[label_key], i))
    def output_sentence(sent):
        label_key = 'cpos' if args['--coarse'] else 'pos'

        if args['--feature-set'] == "dependency":
            for i in xrange(len(sent["word"])):
                try:
                    print >>data_out, u"{label} '{sent_i}-{token_i}{features}".format(
                        label=normalize_label(sent['dependency'][i]),
                        sent_i=sent_i,
                        token_i=i,
                        features=u" ".join(features_for_token(sent['word'], sent[label_key], i)))
                #We need this check for sentences we skip
                except IndexError:
                    continue 

        else:
            for i in xrange(len(sent["word"])):
                print >>data_out, "{label} '{name}-{sent_i}-{token_i}|".format(
                    label=normalize_label(sent[label_key][i]),
                    name=args['--name'],
                    sent_i=sent_i,
                    token_i=i + 1),
                print >>data_out, u" ".join(
                    features_for_token(sent['word'], sent[label_key], i))
示例#3
0
def write_sentences_to_file(sents, data_out):
    for sent_i, sent in enumerate(sents):
        tokens = [token for token, pos in sent]
        tags = [pos for token, pos in sent]

        if sent_i > 0:
            print >> data_out, ""

        for token_i in range(len(sent)):
            print >> data_out, "{label} '{name}-{sent_i}-{token_i}|".format(
                label=normalize_label(tags[token_i]),
                name='brown',
                sent_i=sent_i,
                token_i=token_i),

            print >> data_out, u" ".join(honnibal13(tokens, tags, token_i))