def __init__(self, train=False, cv=True, folds=5, trained_model_name="trained_model.pkl", feat_index_name="feature_index.pkl", stored_tagset="tagset.pkl"): self.trained_model_name = trained_model_name self.feat_index_name = feat_index_name self.stored_tagset = stored_tagset self.cv = cv self.folds = folds self.fp = FeatureProcessing() if train: print >>sys.stderr, "Statement classifier initialized for training." if self.cv: print >>sys.stderr, "Cross-validation will be done" else: self.classifier = pickle.load(open(self.trained_model_name, "rb")) feat_index = pickle.load(open(self.feat_index_name, "rb")) self.tagset = pickle.load(open(self.stored_tagset, "rb")) self.fp.feat_index = feat_index print >>sys.stderr, "Stored model loaded. Statement classifier initialized for prediction."
def __init__(self, do_train=False, trained_model_name="passage_crf_model", algorithm="crf"): self.trained_model_name = trained_model_name self.fp = FeatureProcessing() self.do_train = do_train self.algorithm = algorithm if algorithm == "crf": if do_train: self.trainer = Trainer() else: self.tagger = Tagger() else: if do_train: model = ChainCRF() self.trainer = FrankWolfeSSVM(model=model) self.feat_index = {} self.label_index = {} else: self.tagger = pickle.load(open(self.trained_model_name, "rb")) self.feat_index = pickle.load(open("ssvm_feat_index.pkl", "rb")) label_index = pickle.load(open("ssvm_label_index.pkl", "rb")) self.rev_label_index = {i: x for x, i in label_index.items()}
import sys import codecs import pickle from features import FeatureProcessing trainfile_name = sys.argv[1] outfile_name = sys.argv[2] fp = FeatureProcessing() train_data = [ tuple(x.strip().split("\t")) for x in codecs.open(trainfile_name, "r", "utf-8") ] train_labels, train_clauses = zip(*train_data) print >> sys.stderr, "Indexing features.." fp.index_data(train_clauses) feats = [fp.featurize(clause) for clause in train_clauses] pickle.dump((feats, train_labels, fp.feat_index), open(outfile_name, "wb"))