def createDatasetMetaFeaturesInstances(self, dataset: Dataset, includeValueBased: bool): directoryForDataset = Properties.DatasetInstancesFilesLocation + dataset.name # File[] files; if os.path.isdir(directoryForDataset): _, _, filenames = next(os.walk(directoryForDataset)) if (filenames is not None) and (len(filenames)!=0): Logger.Info('Candidate attributes for ' + dataset.name + ' were already calculated') return try: os.mkdir(directoryForDataset) except OSError as ex: if ex.errno != errno.EEXIST: Logger.Warn(f'getDatasetMetaFeaturesInstances -> Error creating directory {directoryForDataset}\nError: {ex}') raise # List<String> metadataTypes; if includeValueBased: # This is the line that activates the (time consuming) background datasets feature generation process self.generateTrainingSetDatasetAttributes(dataset) metadataTypes = [self.DATASET_BASED, self.OA_BASED, self.VALUES_BASED] else: # for pre-ranker model self.generateTrainingSetDatasetAttributesWithoutValues(dataset) metadataTypes = [self.DATASET_BASED, self.OA_BASED] self.appendARFFFilesPerMetadataTypeForDataset(directoryForDataset, metadataTypes)
def produceScore(self, analyzedDatasets: Dataset, currentScore: ClassificationResults, completeDataset: Dataset, oa: OperatorAssignment, candidateAttribute) -> float: try: mlam = MLAttributeManager() if self.classifier == None: Logger.Error("Classifier is not initialized") raise Exception("Classifier is not initialized") # we need to generate the features for this candidate attribute and then run the (previously) calculated classification model oaba = OperatorAssignmentBasedAttributes() oaAttributes: dict = { } #oaba.getOperatorAssignmentBasedMetaFeatures(analyzedDatasets, oa) candidateAttributes = { k: v for k, v in self.datasetAttributes.items() } for attributeInfo in oaAttributes.values(): candidateAttributes[len(candidateAttributes)] = attributeInfo # We need to add the type of the classifier we're using classifierAttribute = AttributeInfo( "Classifier", outputType.Discrete, Properties.classifier, len(Properties.classifiersForMLAttributesGeneration.split( ","))) candidateAttributes[len(candidateAttributes)] = classifierAttribute # In order to have attributes of the same set size, we need to add the class attribute. We don't know the true value, so we set it to negative classAttrubute = AttributeInfo("classAttribute", outputType.Discrete, 0, 2) candidateAttributes[len(candidateAttributes)] = classAttrubute # finally, we need to set the index of the target class testInstances = mlam.generateValuesMatrix(candidateAttributes) # testInstances.setClassIndex(classAtributeKey); # evaluation = Evaluation(testInstances); # evaluation.evaluateModel(classifier, testInstances); # we have a single prediction, so it's easy to process evaluationInfo = self.classifier.evaluateClassifier(testInstances) prediction = evaluationInfo.predictions[0] #.predictions().get(0); ci = ClassificationItem(prediction.actual(), prediction.distribution()) return ci.getProbabilities()[ analyzedDatasets.getMinorityClassIndex()] except Exception as ex: Logger.Warn("oa working on " + oa.getName()) Logger.Error( "FilterPreRankerEvaluator.produceScore -> Error in ML score generation : " + str(ex)) return -1