Exemplo n.º 1
0
def extract_features(inputfile, outputfile):
    fi = open(inputfile, 'r')
    fo = open(outputfile, 'w')
    crfutils.main(feature_extractor,
                  fields=fields,
                  sep=separator,
                  fi=fi,
                  fo=fo)
    fi.close()
    fo.close()
    v['pref'] = a[0:3].encode('utf-8')
    #else:
    #	v['pref'] = ''
    if len(a) <= 3:
        v['len'] = '1'
    else:
        v['len'] = '0'
    if c == 0:
        v['fir'] = '1'
    else:
        v['fir'] = '0'


def feature_extractor(X):

    c = 0
    for x in X:
        observation(x, c)
        c = c + 1

    # Apply attribute templates to obtain features (in fact, attributes)
    crfutils.apply_templates(X, templates)
    if X:
        # Append BOS and EOS features manually
        X[0]['F'].append('__BOS__')  # BOS feature
        X[-1]['F'].append('__EOS__')  # EOS feature


if __name__ == '__main__':
    crfutils.main(feature_extractor, fields=fields, sep=separator)
Exemplo n.º 3
0
    features = OrderedDict()
    for i in feature_set:
        features[i[0]] = i[1]

    for w, _ in features.items():
        if w in trigrams:
            features[w] = [[[w, -1]], [[w, 0]], [[w, 1]]]

    feature_keys = features.keys()
    feature_items = features.values()
    input_columns = ' '.join(feature_keys) + ' chunk y'

    attribute_templates = []
    for i in feature_items:
        attribute_templates += i

    print("Using features: {}.".format(str(attribute_templates)))

    feature_extractor = lambda x: crfutils.apply_templates(x, attribute_templates)

    for fi, txt in [(train_csv, "train"), (devel_csv, "devel"), (test_csv, "test")]:
        write_to = path.join(trigram_path, txt + "_trigrams_" + str(mode) + ".crfsuite")
        fo = open(write_to, "w+")
        print("Writing to {}...".format(write_to))
        crfutils.main(feature_extractor, fi, fo, fields=input_columns, sep='\t')
        fo.close()

train_csv.close()
devel_csv.close()
test_csv.close()
Exemplo n.º 4
0
    (('w',  2), ),
    (('w', -1), ('w',  0)),
    (('w',  0), ('w',  1)),
    (('pos', -2), ),
    (('pos', -1), ),
    (('pos',  0), ),
    (('pos',  1), ),
    (('pos',  2), ),
    (('pos', -2), ('pos', -1)),
    (('pos', -1), ('pos',  0)),
    (('pos',  0), ('pos',  1)),
    (('pos',  1), ('pos',  2)),
    (('pos', -2), ('pos', -1), ('pos',  0)),
    (('pos', -1), ('pos',  0), ('pos',  1)),
    (('pos',  0), ('pos',  1), ('pos',  2)),
    )


import crfutils

def feature_extractor(X):
    # Apply attribute templates to obtain features (in fact, attributes)
    crfutils.apply_templates(X, templates)
    if X:
	# Append BOS and EOS features manually
        X[0]['F'].append('__BOS__')     # BOS feature
        X[-1]['F'].append('__EOS__')    # EOS feature

if __name__ == '__main__':
    crfutils.main(feature_extractor, fields=fields, sep=separator)
Exemplo n.º 5
0
def generate(infile, outfile):
    sys.stderr.write("Generating features.\n")
    crfutils.main(feature_extractor, fields, separator, open(infile, 'r'),
                  open(outfile, 'w'))
Exemplo n.º 6
0
    return features


def Featurizer(X):
    global DF
    if X:
        sent = []
        for t in range(len(X)):
            sent.append(X[t]['w'])

        for t in range(len(X)):
            w = X[t]['w']
            feats = DF.GetDictFeatures(sent, t) + GetOrthographicFeatures(w)
            for f in feats:
                X[t]['F'].append('%s' % (f))


def FeatureExtractor(X):
    """apply attribute templates to obtain features (in fact, attributes)"""
    crfutils.apply_templates(X, templates)

    Featurizer(X)
    if X:
        X[0]['F'].append('__BOS__')  # BOS feature
        X[-1]['F'].append('__EOS__')  # EOS feature


if __name__ == '__main__':
    DF = DictionaryFeatures("./lexicon")
    crfutils.main(FeatureExtractor, fields=fields, sep=separator)
Exemplo n.º 7
0
def generate(infile, outfile):
	sys.stderr.write("Generating features.\n")
	crfutils.main(feature_extractor, fields, separator, open(infile, 'r'), open(outfile, 'w'))
def extract_features(inputfile,outputfile):
  fi = open(inputfile,'r')
  fo = open(outputfile,'w')
  crfutils.main(feature_extractor,fields=fields,sep=separator,fi=fi,fo=fo)
  fi.close()
  fo.close()
Exemplo n.º 9
0
def main(argv):
    crfutils.main(feature_extractor, fields='w y', sep='\t')
Exemplo n.º 10
0
def Featurizer(X):
    #print 'Featurizer called with X ', X
    global DF
    if X:
        entire= []
        for t in range(len(X)):
            entire.append(X[t]['w'])
        for t in range(len(X)):
            w = X[t]['w']
            feats = DF.GetDictFeatures(entire,t) + GetOrthographicFeatures(w)
            for f in feats:
                X[t]['F'].append('%s'%(f))

def FeatureExtractor(X):
    """apply attribute templates to obtain features (in fact, attributes)"""
    #print 'FeatureExtractor called with X ', X
    crfutils.apply_templates(X, templates)
     
    Featurizer(X)
    #print X
    if X:
        #print 'in if X'
        X[0]['F'].append('__BOS__')     # BOS feature
        X[-1]['F'].append('__EOS__')    # EOS feature

if __name__ == '__main__':
    DF = DictionaryFeatures("./lexicon")
    #print 'Running main crf module ********************'
    crfutils.main(FeatureExtractor, fields=fields, sep=separator)
Exemplo n.º 11
0
    (('w', -2), ),
    (('w', -1), ),
    (('w',  0), ),
    (('w',  1), ),
    (('w',  2), ),
    (('w', -1), ('w',  0)),
    (('w',  0), ('w',  1)),
    (('pos', -2), ),
    (('pos', -1), ),
    (('pos',  0), ),
    (('pos',  1), ),
    (('pos',  2), ),
    (('pos', -2), ('pos', -1)),
    (('pos', -1), ('pos',  0)),
    (('pos',  0), ('pos',  1)),
    (('pos',  1), ('pos',  2)),
    (('pos', -2), ('pos', -1), ('pos',  0)),
    (('pos', -1), ('pos',  0), ('pos',  1)),
    (('pos',  0), ('pos',  1), ('pos',  2)),
    )

def feature_extractor(X):
    template.apply(X, templates)
    if X:
        X[0]['F'].append('__BOS__')
        X[-1]['F'].append('__EOS__')

if __name__ == '__main__':
    crfutils.main(feature_extractor, fields='w pos y', sep=' ')

Exemplo n.º 12
0
    (('pos',  2), ),
    (('pos', -2), ('pos', -1)),
    (('pos', -1), ('pos',  0)),
    (('pos',  0), ('pos',  1)),
    (('pos',  1), ('pos',  2)),
    (('pos', -2), ('pos', -1), ('pos',  0)),
    (('pos', -1), ('pos',  0), ('pos',  1)),
    (('pos',  0), ('pos',  1), ('pos',  2)),
    )

import re
templates = (
    (('w', 0, re.compile(r'^\d{2}$')), ('w', 1, re.compile(r'年'))),
    (('w', -1, re.compile(r'^\d{2}$')), ('w', 0, re.compile(r'年'))),
)

import crfutils

def feature_extractor(X):
    # Apply attribute templates to obtain features (in fact, attributes)
    crfutils.apply_templates(X, templates)
    #if X:
	## Append BOS and EOS features manually
    #    X[0]['F'].append('__BOS__')     # BOS feature
    #    X[-1]['F'].append('__EOS__')    # EOS feature

if __name__ == '__main__':
    import sys
    input_file = sys.argv[1]
    crfutils.main(feature_extractor, input_file=input_file, fields=fields, sep=separator)