def _extract_discr(mpaths): "extract discriminating features" dpack0 = dconf.pack.values()[0] labels = dpack0.labels vocab = dpack0.vocab models = Team(attach=mpaths['attach'], label=mpaths['label']).fmap(joblib.load) return discriminating_features(models, labels, vocab, _top_n)
def test_postlabel_parser(self): learners = LEARNERS + [ Team(attach=StructuredPerceptron( MST_DECODER, n_iter=3, average=True, use_prob=False), label=SklearnLabelClassifier(LogisticRegression())), ] for lrn, dcd in itr.product(learners, DECODERS): parser = PostlabelPipeline(learner_attach=lrn.attach, learner_label=lrn.label, decoder=dcd) self._test_parser(parser)
def test_intra_parsers(self): 'test all intra/inter parsers on a dpack' learner_intra = Team( attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) learner_inter = Team( attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) # note: these are chosen a bit randomly p_intra = JointPipeline(learner_attach=learner_intra.attach, learner_label=learner_intra.label, decoder=MST_DECODER) p_inter = PostlabelPipeline(learner_attach=learner_inter.attach, learner_label=learner_inter.label, decoder=MST_DECODER) parsers = [ mk_p(IntraInterPair(intra=p_intra, inter=p_inter)) for mk_p in [SentOnlyParser, SoftParser, HeadToHeadParser] ] for parser in parsers: self._test_parser(parser)
def test_postlabel_parser(self): learners = LEARNERS +\ [ Team(attach=StructuredPerceptron(MST_DECODER, LOCAL_PERC_ARGS), label=SklearnLabelClassifier(LogisticRegression())), ] for l, d in itr.product(learners, DECODERS): parser = PostlabelPipeline(learner_attach=l.attach, learner_label=l.label, decoder=d) self._test_parser(parser)
os.makedirs(TMP_OUTPUT) # load the data mpack = load_multipack(PREFIX + '.edus', PREFIX + '.pairings', PREFIX + '.features.sparse', PREFIX + '.features.sparse.vocab', verbose=True) # divide the dataset into folds num_folds = min((10, len(mpack))) fold_dict = make_n_fold(mpack, num_folds, mk_rng()) # select a decoder and a learner team decoder = MstDecoder(root_strategy=MstRootStrategy.fake_root) learners = Team(attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) # put them together as a parser parser = JointPipeline(learner_attach=learners.attach, learner_label=learners.label, decoder=decoder) # run cross-fold evaluation scores = [] for fold in range(num_folds): print(">>> doing fold ", fold + 1, file=sys.stderr) print("training ... ", file=sys.stderr) # learn a model for the training data for this fold train_packs = select_training(mpack, fold_dict, fold).values() parser.fit(train_packs, [x.target for x in train_packs])