def _localReadMoreXML(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs @ In, xmlNode, xml.etree.ElementTree Element Objects, the xml element node that will be checked against the available options specific to this Sampler @ Out, None """ #paramInput = CrossValidation.getInputSpecification()() #paramInput.parseNode(xmlNode) self.initializationOptionDict = {} for child in xmlNode: if child.tag == 'SciKitLearn': self.initializationOptionDict[ child.tag] = self._localInputAndCheck(child) self.CVEngine = CrossValidations.returnInstance( child.tag, self, **self.initializationOptionDict[child.tag]) elif child.tag == 'Metric': if 'type' not in child.attrib.keys( ) or 'class' not in child.attrib.keys(): self.raiseAnError( IOError, 'Tag Metric must have attributes "class" and "type"') else: metricName = child.text.strip() self.metricsDict[metricName] = None else: self.raiseAnError(IOError, "Unknown xml node ", child.tag, " is provided for metric system")
def run(self, inputIn): """ This method executes the postprocessor action. @ In, inputIn, list, list of objects, i.e. the object contained the data to process, the instance of model. @ Out, outputDict, dict, Dictionary containing the results """ inputDict, cvEstimator = self.inputToInternal(inputIn, full=True) if cvEstimator.subType in self.invalidRom: self.raiseAnError( IOError, cvEstimator.subType, " can not be retrained, thus can not be used in Cross Validation post-processor ", self.name) if self.dynamic: self.raiseAnError(IOError, "Not implemented yet") initDict = copy.deepcopy(self.initializationOptionDict) cvEngine = None groups = None for key, value in initDict.items(): if key == "SciKitLearn": groups = value.pop("labels", None) cvEngine = CrossValidations.returnInstance(key, self, **value) break if cvEngine is None: self.raiseAnError(IOError, "No cross validation engine is provided!") outputDict = {} # In SciKit-Learn (version > 0.18), module model_selection is used to perform cross validation # A wrapper in RAVEN is created, and the method .split is replaced with generateTrainTestIndices # In the old version, 'labels' is used for the label-related cross validation. In the new versions # Both keywords 'y' and 'groups' can be used to specify the labels. The keyword 'y' is mainly used by # SciKit-Learn supervised learning problems, and 'groups' become additional option to specify the group # labels that can be used while splitting the dataset into train/test set. For our purpose, only one # label option is needed. ~ wangc for trainIndex, testIndex in cvEngine.generateTrainTestIndices( list(inputDict.values())[0], y=groups, groups=groups): trainDict, testDict = self.__generateTrainTestInputs( inputDict, trainIndex, testIndex) ## Train the rom cvEstimator.train(trainDict) ## evaluate the rom outputEvaluation = cvEstimator.evaluate(testDict) ## Compute the distance between ROM and given data using Metric system for targetName, targetValue in outputEvaluation.items(): for metricInstance in self.metricsDict.values(): metricValue = metricInstance.evaluate( targetValue, testDict[targetName]) if hasattr(metricInstance, 'metricType'): if metricInstance.metricType[ 1] not in self.validMetrics: self.raiseAnError( IOError, "The metric type: ", metricInstance.metricType[1], " can not be used, \ the accepted metric types are: ", ",".join(self.validMetrics)) metricName = metricInstance.metricType[1] else: self.raiseAnError( IOError, "The metric: ", metricInstance.name, " can not be used, the accepted metric types are: ", str(self.validMetrics)) varName = 'cv' + '_' + metricInstance.name + '_' + targetName if varName not in outputDict.keys(): outputDict[varName] = np.array([]) outputDict[varName] = np.append(outputDict[varName], metricValue) scoreDict = {} if not self.cvScore: return outputDict else: for varName, metricValues in outputDict.items(): if self.cvScore.lower() == 'maximum': scoreDict[varName] = np.atleast_1d( np.amax(np.atleast_1d(metricValues))) elif self.cvScore.lower() == 'median': scoreDict[varName] = np.atleast_1d( np.median(np.atleast_1d(metricValues))) elif self.cvScore.lower() == 'average': scoreDict[varName] = np.atleast_1d( np.mean(np.atleast_1d(metricValues))) return scoreDict
def run(self, inputIn): """ This method executes the postprocessor action. @ In, inputIn, list, list of objects, i.e. the object contained the data to process, the instance of model. @ Out, outputDict, dict, Dictionary containing the results """ inputDict, cvEstimator = self.inputToInternal(inputIn, full=True) if cvEstimator.subType in self.invalidRom: self.raiseAnError( IOError, cvEstimator.subType, " can not be retrained, thus can not be used in Cross Validation post-processor ", self.name) if self.dynamic: self.raiseAnError(IOError, "Not implemented yet") initDict = copy.deepcopy(self.initializationOptionDict) cvEngine = None for key, value in initDict.items(): if key == "SciKitLearn": if value['SKLtype'] in self.CVList: dataSize = np.asarray(inputDict.values()[0]).size value['n'] = dataSize cvEngine = CrossValidations.returnInstance(key, self, **value) break if cvEngine is None: self.raiseAnError(IOError, "No cross validation engine is provided!") outputDict = {} for trainIndex, testIndex in cvEngine.generateTrainTestIndices(): trainDict, testDict = self.__generateTrainTestInputs( inputDict, trainIndex, testIndex) ## Train the rom cvEstimator.train(trainDict) ## evaluate the rom outputEvaluation = cvEstimator.evaluate(testDict) ## Compute the distance between ROM and given data using Metric system for targetName, targetValue in outputEvaluation.items(): for metricInstance in self.metricsDict.values(): metricValue = metricInstance.evaluate( targetValue, testDict[targetName]) if hasattr(metricInstance, 'metricType'): if metricInstance.metricType[ 1] not in self.validMetrics: self.raiseAnError( IOError, "The metric type: ", metricInstance.metricType[1], " can not be used, \ the accepted metric types are: ", ",".join(self.validMetrics)) metricName = metricInstance.metricType[1] else: self.raiseAnError( IOError, "The metric: ", metricInstance.name, " can not be used, the accepted metric types are: ", str(self.validMetrics)) varName = 'cv' + '_' + metricInstance.name + '_' + targetName if varName not in outputDict.keys(): outputDict[varName] = np.array([]) outputDict[varName] = np.append(outputDict[varName], metricValue) scoreDict = {} if not self.cvScore: return outputDict else: for varName, metricValues in outputDict.items(): if self.cvScore.lower() == 'maximum': scoreDict[varName] = np.atleast_1d( np.amax(np.atleast_1d(metricValues))) elif self.cvScore.lower() == 'median': scoreDict[varName] = np.atleast_1d( np.median(np.atleast_1d(metricValues))) elif self.cvScore.lower() == 'average': scoreDict[varName] = np.atleast_1d( np.mean(np.atleast_1d(metricValues))) return scoreDict