def extract_features(inputfile, outputfile): fi = open(inputfile, 'r') fo = open(outputfile, 'w') crfutils.main(feature_extractor, fields=fields, sep=separator, fi=fi, fo=fo) fi.close() fo.close()
v['pref'] = a[0:3].encode('utf-8') #else: # v['pref'] = '' if len(a) <= 3: v['len'] = '1' else: v['len'] = '0' if c == 0: v['fir'] = '1' else: v['fir'] = '0' def feature_extractor(X): c = 0 for x in X: observation(x, c) c = c + 1 # Apply attribute templates to obtain features (in fact, attributes) crfutils.apply_templates(X, templates) if X: # Append BOS and EOS features manually X[0]['F'].append('__BOS__') # BOS feature X[-1]['F'].append('__EOS__') # EOS feature if __name__ == '__main__': crfutils.main(feature_extractor, fields=fields, sep=separator)
features = OrderedDict() for i in feature_set: features[i[0]] = i[1] for w, _ in features.items(): if w in trigrams: features[w] = [[[w, -1]], [[w, 0]], [[w, 1]]] feature_keys = features.keys() feature_items = features.values() input_columns = ' '.join(feature_keys) + ' chunk y' attribute_templates = [] for i in feature_items: attribute_templates += i print("Using features: {}.".format(str(attribute_templates))) feature_extractor = lambda x: crfutils.apply_templates(x, attribute_templates) for fi, txt in [(train_csv, "train"), (devel_csv, "devel"), (test_csv, "test")]: write_to = path.join(trigram_path, txt + "_trigrams_" + str(mode) + ".crfsuite") fo = open(write_to, "w+") print("Writing to {}...".format(write_to)) crfutils.main(feature_extractor, fi, fo, fields=input_columns, sep='\t') fo.close() train_csv.close() devel_csv.close() test_csv.close()
(('w', 2), ), (('w', -1), ('w', 0)), (('w', 0), ('w', 1)), (('pos', -2), ), (('pos', -1), ), (('pos', 0), ), (('pos', 1), ), (('pos', 2), ), (('pos', -2), ('pos', -1)), (('pos', -1), ('pos', 0)), (('pos', 0), ('pos', 1)), (('pos', 1), ('pos', 2)), (('pos', -2), ('pos', -1), ('pos', 0)), (('pos', -1), ('pos', 0), ('pos', 1)), (('pos', 0), ('pos', 1), ('pos', 2)), ) import crfutils def feature_extractor(X): # Apply attribute templates to obtain features (in fact, attributes) crfutils.apply_templates(X, templates) if X: # Append BOS and EOS features manually X[0]['F'].append('__BOS__') # BOS feature X[-1]['F'].append('__EOS__') # EOS feature if __name__ == '__main__': crfutils.main(feature_extractor, fields=fields, sep=separator)
def generate(infile, outfile): sys.stderr.write("Generating features.\n") crfutils.main(feature_extractor, fields, separator, open(infile, 'r'), open(outfile, 'w'))
return features def Featurizer(X): global DF if X: sent = [] for t in range(len(X)): sent.append(X[t]['w']) for t in range(len(X)): w = X[t]['w'] feats = DF.GetDictFeatures(sent, t) + GetOrthographicFeatures(w) for f in feats: X[t]['F'].append('%s' % (f)) def FeatureExtractor(X): """apply attribute templates to obtain features (in fact, attributes)""" crfutils.apply_templates(X, templates) Featurizer(X) if X: X[0]['F'].append('__BOS__') # BOS feature X[-1]['F'].append('__EOS__') # EOS feature if __name__ == '__main__': DF = DictionaryFeatures("./lexicon") crfutils.main(FeatureExtractor, fields=fields, sep=separator)
def extract_features(inputfile,outputfile): fi = open(inputfile,'r') fo = open(outputfile,'w') crfutils.main(feature_extractor,fields=fields,sep=separator,fi=fi,fo=fo) fi.close() fo.close()
def main(argv): crfutils.main(feature_extractor, fields='w y', sep='\t')
def Featurizer(X): #print 'Featurizer called with X ', X global DF if X: entire= [] for t in range(len(X)): entire.append(X[t]['w']) for t in range(len(X)): w = X[t]['w'] feats = DF.GetDictFeatures(entire,t) + GetOrthographicFeatures(w) for f in feats: X[t]['F'].append('%s'%(f)) def FeatureExtractor(X): """apply attribute templates to obtain features (in fact, attributes)""" #print 'FeatureExtractor called with X ', X crfutils.apply_templates(X, templates) Featurizer(X) #print X if X: #print 'in if X' X[0]['F'].append('__BOS__') # BOS feature X[-1]['F'].append('__EOS__') # EOS feature if __name__ == '__main__': DF = DictionaryFeatures("./lexicon") #print 'Running main crf module ********************' crfutils.main(FeatureExtractor, fields=fields, sep=separator)
(('w', -2), ), (('w', -1), ), (('w', 0), ), (('w', 1), ), (('w', 2), ), (('w', -1), ('w', 0)), (('w', 0), ('w', 1)), (('pos', -2), ), (('pos', -1), ), (('pos', 0), ), (('pos', 1), ), (('pos', 2), ), (('pos', -2), ('pos', -1)), (('pos', -1), ('pos', 0)), (('pos', 0), ('pos', 1)), (('pos', 1), ('pos', 2)), (('pos', -2), ('pos', -1), ('pos', 0)), (('pos', -1), ('pos', 0), ('pos', 1)), (('pos', 0), ('pos', 1), ('pos', 2)), ) def feature_extractor(X): template.apply(X, templates) if X: X[0]['F'].append('__BOS__') X[-1]['F'].append('__EOS__') if __name__ == '__main__': crfutils.main(feature_extractor, fields='w pos y', sep=' ')
(('pos', 2), ), (('pos', -2), ('pos', -1)), (('pos', -1), ('pos', 0)), (('pos', 0), ('pos', 1)), (('pos', 1), ('pos', 2)), (('pos', -2), ('pos', -1), ('pos', 0)), (('pos', -1), ('pos', 0), ('pos', 1)), (('pos', 0), ('pos', 1), ('pos', 2)), ) import re templates = ( (('w', 0, re.compile(r'^\d{2}$')), ('w', 1, re.compile(r'年'))), (('w', -1, re.compile(r'^\d{2}$')), ('w', 0, re.compile(r'年'))), ) import crfutils def feature_extractor(X): # Apply attribute templates to obtain features (in fact, attributes) crfutils.apply_templates(X, templates) #if X: ## Append BOS and EOS features manually # X[0]['F'].append('__BOS__') # BOS feature # X[-1]['F'].append('__EOS__') # EOS feature if __name__ == '__main__': import sys input_file = sys.argv[1] crfutils.main(feature_extractor, input_file=input_file, fields=fields, sep=separator)