Пример #1
0
    def createDatasetMetaFeaturesInstances(self, dataset: Dataset, includeValueBased: bool):
        directoryForDataset = Properties.DatasetInstancesFilesLocation + dataset.name
        # File[] files;

        if os.path.isdir(directoryForDataset):
            _, _, filenames = next(os.walk(directoryForDataset))
            if (filenames is not None) and (len(filenames)!=0):
                Logger.Info('Candidate attributes for ' + dataset.name + ' were already calculated')
                return

        try:
            os.mkdir(directoryForDataset)
        except OSError as ex:
            if ex.errno != errno.EEXIST:
                Logger.Warn(f'getDatasetMetaFeaturesInstances -> Error creating directory {directoryForDataset}\nError: {ex}')
                raise

        # List<String> metadataTypes;
        if includeValueBased:
            # This is the line that activates the (time consuming) background datasets feature generation process
            self.generateTrainingSetDatasetAttributes(dataset)
            metadataTypes = [self.DATASET_BASED, self.OA_BASED, self.VALUES_BASED]
        else:
            # for pre-ranker model
            self.generateTrainingSetDatasetAttributesWithoutValues(dataset)
            metadataTypes = [self.DATASET_BASED, self.OA_BASED]

        self.appendARFFFilesPerMetadataTypeForDataset(directoryForDataset, metadataTypes)
    def produceScore(self, analyzedDatasets: Dataset,
                     currentScore: ClassificationResults,
                     completeDataset: Dataset, oa: OperatorAssignment,
                     candidateAttribute) -> float:
        try:
            mlam = MLAttributeManager()
            if self.classifier == None:
                Logger.Error("Classifier is not initialized")
                raise Exception("Classifier is not initialized")

            # we need to generate the features for this candidate attribute and then run the (previously) calculated classification model
            oaba = OperatorAssignmentBasedAttributes()
            oaAttributes: dict = {
            }  #oaba.getOperatorAssignmentBasedMetaFeatures(analyzedDatasets, oa)

            candidateAttributes = {
                k: v
                for k, v in self.datasetAttributes.items()
            }
            for attributeInfo in oaAttributes.values():
                candidateAttributes[len(candidateAttributes)] = attributeInfo

            # We need to add the type of the classifier we're using
            classifierAttribute = AttributeInfo(
                "Classifier", outputType.Discrete, Properties.classifier,
                len(Properties.classifiersForMLAttributesGeneration.split(
                    ",")))
            candidateAttributes[len(candidateAttributes)] = classifierAttribute

            # In order to have attributes of the same set size, we need to add the class attribute. We don't know the true value, so we set it to negative
            classAttrubute = AttributeInfo("classAttribute",
                                           outputType.Discrete, 0, 2)
            candidateAttributes[len(candidateAttributes)] = classAttrubute

            # finally, we need to set the index of the target class
            testInstances = mlam.generateValuesMatrix(candidateAttributes)
            # testInstances.setClassIndex(classAtributeKey);

            # evaluation = Evaluation(testInstances);
            # evaluation.evaluateModel(classifier, testInstances);

            # we have a single prediction, so it's easy to process
            evaluationInfo = self.classifier.evaluateClassifier(testInstances)
            prediction = evaluationInfo.predictions[0]  #.predictions().get(0);
            ci = ClassificationItem(prediction.actual(),
                                    prediction.distribution())
            return ci.getProbabilities()[
                analyzedDatasets.getMinorityClassIndex()]

        except Exception as ex:
            Logger.Warn("oa working on " + oa.getName())

            Logger.Error(
                "FilterPreRankerEvaluator.produceScore -> Error in ML score generation : "
                + str(ex))
            return -1