def test_time_token_contexts_to_features(self): context_creator_list = self.config['context_creators'] context_creators = import_utils.build_objects(context_creator_list) interesting_tokens = set(['the','it', 'a']) token_contexts = experiment_utils.map_contexts(interesting_tokens, context_creators) feature_extractor_list = self.config['feature_extractors'][:1] feature_extractors = import_utils.build_objects(feature_extractor_list) start = time.time() mapped_contexts = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors) finish = time.time() - start print "Single: ", finish start = time.time() mapped_contexts = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors, workers=10) finish = time.time() - start print "Multiple: ", finish
def test_token_contexts_to_features(self): context_creator_list = self.config['context_creators'] context_creators = import_utils.build_objects(context_creator_list) interesting_tokens = set(['the','it', 'a']) token_contexts = experiment_utils.map_contexts(interesting_tokens, context_creators) feature_extractor_list = self.config['feature_extractors'][:1] feature_extractors = import_utils.build_objects(feature_extractor_list) workers = 8 mapped_contexts = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors, workers=8) self.assertEqual(set(mapped_contexts.keys()), set(token_contexts.keys())) for tok, feature_vecs in mapped_contexts.items(): self.assertTrue(feature_vecs.shape[0] == len(token_contexts[tok]))
def test_token_classifiers(self): interesting_tokens = set(['the','it', 'a']) context_creators = import_utils.build_objects(self.config['context_creators']) token_contexts = experiment_utils.map_contexts(interesting_tokens, context_creators) feature_extractors = import_utils.build_objects(self.config['feature_extractors']) token_context_features = experiment_utils.token_contexts_to_features(token_contexts, feature_extractors) binarizers = experiment_utils.fit_binarizers(experiment_utils.flatten(token_context_features.values())) token_context_features = {k: [experiment_utils.binarize(v, binarizers) for v in val] for k, val in token_context_features.items()} token_context_tags = experiment_utils.tags_from_contexts(token_contexts) # train the classifier for each token classifier_type = experiment_utils.import_class(self.config['learning']['classifier']['module']) classifier_map = learning_utils.token_classifiers(token_context_features, token_context_tags, classifier_type) self.assertEqual(set(token_context_tags.keys()), set(classifier_map.keys())) for tok, classifier in classifier_map.items(): self.assertTrue(hasattr(classifier, 'predict'))