示例#1
0
    def test_time_token_contexts_to_features(self):
        context_creator_list = self.config['context_creators']
        context_creators = import_utils.build_objects(context_creator_list)
        interesting_tokens = set(['the','it', 'a'])

        token_contexts = experiment_utils.map_contexts(interesting_tokens, context_creators)

        feature_extractor_list = self.config['feature_extractors'][:1]
        feature_extractors = import_utils.build_objects(feature_extractor_list)

        start = time.time()
        mapped_contexts = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors)
        finish = time.time() - start
        print "Single: ", finish

        start = time.time()
        mapped_contexts = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors, workers=10)
        finish = time.time() - start
        print "Multiple: ", finish
示例#2
0
    def test_token_contexts_to_features(self):
        context_creator_list = self.config['context_creators']
        context_creators = import_utils.build_objects(context_creator_list)
        interesting_tokens = set(['the','it', 'a'])

        token_contexts = experiment_utils.map_contexts(interesting_tokens, context_creators)

        feature_extractor_list = self.config['feature_extractors'][:1]
        feature_extractors = import_utils.build_objects(feature_extractor_list)

        workers = 8
        mapped_contexts = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors, workers=8)

        self.assertEqual(set(mapped_contexts.keys()), set(token_contexts.keys()))
        for tok, feature_vecs in mapped_contexts.items():
            self.assertTrue(feature_vecs.shape[0] == len(token_contexts[tok]))
示例#3
0
    def test_token_classifiers(self):
        interesting_tokens = set(['the','it', 'a'])
        context_creators = import_utils.build_objects(self.config['context_creators'])
        token_contexts = experiment_utils.map_contexts(interesting_tokens, context_creators)

        feature_extractors = import_utils.build_objects(self.config['feature_extractors'])
        token_context_features = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors)
        binarizers = experiment_utils.fit_binarizers(experiment_utils.flatten(token_context_features.values()))
        token_context_features = {k: [experiment_utils.binarize(v, binarizers) for v in val] for k, val in token_context_features.items()}

        token_context_tags = experiment_utils.tags_from_contexts(token_contexts)

        # train the classifier for each token
        classifier_type = experiment_utils.import_class(self.config['learning']['classifier']['module'])

        classifier_map = learning_utils.token_classifiers(token_context_features, token_context_tags, classifier_type)
        self.assertEqual(set(token_context_tags.keys()), set(classifier_map.keys()))
        for tok, classifier in classifier_map.items():
            self.assertTrue(hasattr(classifier, 'predict'))