def MEclassifier_model(self, compute_feature=False, is_train=True):
        '''
        Maximum entropy based classifier for Class Induction
        '''
        from oke.oak.FeatureFactory import FeatureFactory
        #load the train (goldstandard) data
        featureFactory = FeatureFactory()
        if compute_feature:
            #compute and export training features
            featureFactory.export_to_features('trainWithFeatures')
        else:
            print("skip computing features...")

        print('load features from \'trainWithFeatures.json\'... ')
        # write the updated data into JSON files
        datums = featureFactory.readData('trainWithFeatures.json')
        train_set = [(datum.features, datum.label) for datum in datums]
        print("train set size", len(train_set))

        if is_train:
            class_classifier = self.train(train_set)
        else:
            class_classifier = self.load_classifier_model(
                classifier_pickled="me_class_inducer.m")

        return class_classifier
    def feature_extraction_for_prediction(self, graph_in_memory, context,
                                          context_sent):
        '''
        feature extraction for current context task in prediction phase
        '''
        from oke.oak.nif2rdfProcessor import NIF2RDFProcessor
        from oke.oak.FeatureFactory import FeatureFactory

        dataProcessor = NIF2RDFProcessor()
        featureFactory = FeatureFactory()

        context_data = dataProcessor.aggregate_context_data(
            graph_in_memory, context, context_sent)
        datums = featureFactory.compute_features(context_data)

        return (datums, context_data)
    def batch_ontology_alignment(self):
        '''
        ontology alignment for DOLCE+DnS Ultra Lite classes
            : query for dbpedia rdf types -> wordnet path similarity (is-a taxonomy) matching
        '''
        from oke.oak.FeatureFactory import FeatureFactory
        from oke.oak.util import extract_type_label
        import collections

        featureFactory = FeatureFactory()

        refsets = collections.defaultdict(set)
        testsets = collections.defaultdict(set)

        contextDict = self.dataProcessor.get_task_context(
            self.dataProcessor.graphData_goldstandards)
        entityset = set()
        dulclassset = set()
        without_duclass_num = 0

        true_positive = 0
        false_positive = 0
        true_negative = 0
        false_negative = 0

        for context, context_sent in contextDict.items():
            context_data = featureFactory.dataProcessor.aggregate_context_data(
                featureFactory.dataProcessor.graphData_goldstandards, context,
                context_sent)

            entity_dbpedia_URI = context_data.entity.taIdentRef
            entityClasses = context_data.entity.isInstOfEntityClasses

            labelled_class_type = [
                entityClass.subClassOf for entityClass in entityClasses
            ]
            print('labelled class type:', labelled_class_type)

            entity_class_labels = set(
                [entityClass.anchorOf for entityClass in entityClasses])

            entity_rdftypes = featureFactory.dbpedia_query_rdftypes(
                entity_dbpedia_URI)

            class_inst_rdftypes = featureFactory.dbpedia_query_deferencing_type(
                entity_class_labels)
            '''step 1: Linked Open Data Discovering: check if there is dul/d0 class already associated with entity and type (by dereferenceable URI)
            '''
            #http://www.ontologydesignpatterns.org/ont/d0.owl#Location
            entity_rdf_type_labels = set([
                extract_type_label(
                    featureFactory.get_URI_fragmentIdentifier(rdftype_uri))
                for rdftype_uri in entity_rdftypes
            ])
            #TODO: entity_class_rdf_type_labels

            # step 1: check whether there exist dul class already classified in DBpedia
            dulClass = [
                rdftype for rdftype in entity_rdftypes
                if self.is_dul_class(rdftype)
            ]

            entityset.add(context_data.entity.taIdentRef)
            testset = set()
            if len(dulClass) > 0 and dulClass[
                    0] in featureFactory.dul_ontology_classes.keys():
                dulclassset.add(dulClass[0])
                testset.add(dulClass[0])
            else:
                #'<',entity_dbpedia_URI,
                without_duclass_num += 1
                print(
                    str(without_duclass_num) +
                    '> do not have dul class pre-classified in DBpedia')

                entity_synset = set()
                entity_synset.update(entity_rdf_type_labels)
                entity_synset.update(entity_class_labels)

                aligned_type = self.schema_alignment_by_wordnet(
                    entity_synset, featureFactory.dul_ontology_classes)
                print("string similarity aligned type for [",
                      entity_class_labels, '] is [', aligned_type, ']')
                dulclassset.add(aligned_type)
                testset.add(aligned_type)

            print("labelled class type:", labelled_class_type)
            print("predicted class type:", testset)
            if (len(testset) > 0 and len(labelled_class_type) == 0):
                false_positive += 1
            elif (list(testset)[0] == list(labelled_class_type)[0]):
                true_positive += 1
            else:
                false_positive += 1

        print('precision:', true_positive / (true_positive + false_positive))
        print('entityset size:', len(entityset))
        print('existing dul class size:', len(dulclassset))