Exemplo n.º 1
0
    def _localReadMoreXML(self, xmlNode):
        """
      Function to read the portion of the xml input that belongs to this specialized class
      and initialize some stuff based on the inputs
      @ In, xmlNode, xml.etree.ElementTree Element Objects, the xml element node that will be checked against
        the available options specific to this Sampler
      @ Out, None
    """
        #paramInput = CrossValidation.getInputSpecification()()
        #paramInput.parseNode(xmlNode)

        self.initializationOptionDict = {}

        for child in xmlNode:
            if child.tag == 'SciKitLearn':
                self.initializationOptionDict[
                    child.tag] = self._localInputAndCheck(child)
                self.CVEngine = CrossValidations.returnInstance(
                    child.tag, self,
                    **self.initializationOptionDict[child.tag])
            elif child.tag == 'Metric':
                if 'type' not in child.attrib.keys(
                ) or 'class' not in child.attrib.keys():
                    self.raiseAnError(
                        IOError,
                        'Tag Metric must have attributes "class" and "type"')
                else:
                    metricName = child.text.strip()
                    self.metricsDict[metricName] = None
            else:
                self.raiseAnError(IOError, "Unknown xml node ", child.tag,
                                  " is provided for metric system")
Exemplo n.º 2
0
 def run(self, inputIn):
     """
   This method executes the postprocessor action.
   @ In, inputIn, list, list of objects, i.e. the object contained the data to process, the instance of model.
   @ Out, outputDict, dict, Dictionary containing the results
 """
     inputDict, cvEstimator = self.inputToInternal(inputIn, full=True)
     if cvEstimator.subType in self.invalidRom:
         self.raiseAnError(
             IOError, cvEstimator.subType,
             " can not be retrained, thus can not be used in Cross Validation post-processor ",
             self.name)
     if self.dynamic:
         self.raiseAnError(IOError, "Not implemented yet")
     initDict = copy.deepcopy(self.initializationOptionDict)
     cvEngine = None
     groups = None
     for key, value in initDict.items():
         if key == "SciKitLearn":
             groups = value.pop("labels", None)
             cvEngine = CrossValidations.returnInstance(key, self, **value)
             break
     if cvEngine is None:
         self.raiseAnError(IOError,
                           "No cross validation engine is provided!")
     outputDict = {}
     # In SciKit-Learn (version > 0.18), module model_selection is used to perform cross validation
     # A wrapper in RAVEN is created, and the method .split is replaced with generateTrainTestIndices
     # In the old version, 'labels' is used for the label-related cross validation. In the new versions
     # Both keywords 'y' and 'groups' can be used to specify the labels. The keyword 'y' is mainly used by
     # SciKit-Learn supervised learning problems, and 'groups' become additional option to specify the group
     # labels that can be used while splitting the dataset into train/test set. For our purpose, only one
     # label option is needed. ~ wangc
     for trainIndex, testIndex in cvEngine.generateTrainTestIndices(
             list(inputDict.values())[0], y=groups, groups=groups):
         trainDict, testDict = self.__generateTrainTestInputs(
             inputDict, trainIndex, testIndex)
         ## Train the rom
         cvEstimator.train(trainDict)
         ## evaluate the rom
         outputEvaluation = cvEstimator.evaluate(testDict)
         ## Compute the distance between ROM and given data using Metric system
         for targetName, targetValue in outputEvaluation.items():
             for metricInstance in self.metricsDict.values():
                 metricValue = metricInstance.evaluate(
                     targetValue, testDict[targetName])
                 if hasattr(metricInstance, 'metricType'):
                     if metricInstance.metricType[
                             1] not in self.validMetrics:
                         self.raiseAnError(
                             IOError, "The metric type: ",
                             metricInstance.metricType[1],
                             " can not be used, \
                   the accepted metric types are: ",
                             ",".join(self.validMetrics))
                     metricName = metricInstance.metricType[1]
                 else:
                     self.raiseAnError(
                         IOError, "The metric: ", metricInstance.name,
                         " can not be used, the accepted metric types are: ",
                         str(self.validMetrics))
                 varName = 'cv' + '_' + metricInstance.name + '_' + targetName
                 if varName not in outputDict.keys():
                     outputDict[varName] = np.array([])
                 outputDict[varName] = np.append(outputDict[varName],
                                                 metricValue)
     scoreDict = {}
     if not self.cvScore:
         return outputDict
     else:
         for varName, metricValues in outputDict.items():
             if self.cvScore.lower() == 'maximum':
                 scoreDict[varName] = np.atleast_1d(
                     np.amax(np.atleast_1d(metricValues)))
             elif self.cvScore.lower() == 'median':
                 scoreDict[varName] = np.atleast_1d(
                     np.median(np.atleast_1d(metricValues)))
             elif self.cvScore.lower() == 'average':
                 scoreDict[varName] = np.atleast_1d(
                     np.mean(np.atleast_1d(metricValues)))
         return scoreDict
Exemplo n.º 3
0
 def run(self, inputIn):
     """
   This method executes the postprocessor action.
   @ In, inputIn, list, list of objects, i.e. the object contained the data to process, the instance of model.
   @ Out, outputDict, dict, Dictionary containing the results
 """
     inputDict, cvEstimator = self.inputToInternal(inputIn, full=True)
     if cvEstimator.subType in self.invalidRom:
         self.raiseAnError(
             IOError, cvEstimator.subType,
             " can not be retrained, thus can not be used in Cross Validation post-processor ",
             self.name)
     if self.dynamic:
         self.raiseAnError(IOError, "Not implemented yet")
     initDict = copy.deepcopy(self.initializationOptionDict)
     cvEngine = None
     for key, value in initDict.items():
         if key == "SciKitLearn":
             if value['SKLtype'] in self.CVList:
                 dataSize = np.asarray(inputDict.values()[0]).size
                 value['n'] = dataSize
             cvEngine = CrossValidations.returnInstance(key, self, **value)
             break
     if cvEngine is None:
         self.raiseAnError(IOError,
                           "No cross validation engine is provided!")
     outputDict = {}
     for trainIndex, testIndex in cvEngine.generateTrainTestIndices():
         trainDict, testDict = self.__generateTrainTestInputs(
             inputDict, trainIndex, testIndex)
         ## Train the rom
         cvEstimator.train(trainDict)
         ## evaluate the rom
         outputEvaluation = cvEstimator.evaluate(testDict)
         ## Compute the distance between ROM and given data using Metric system
         for targetName, targetValue in outputEvaluation.items():
             for metricInstance in self.metricsDict.values():
                 metricValue = metricInstance.evaluate(
                     targetValue, testDict[targetName])
                 if hasattr(metricInstance, 'metricType'):
                     if metricInstance.metricType[
                             1] not in self.validMetrics:
                         self.raiseAnError(
                             IOError, "The metric type: ",
                             metricInstance.metricType[1],
                             " can not be used, \
                   the accepted metric types are: ",
                             ",".join(self.validMetrics))
                     metricName = metricInstance.metricType[1]
                 else:
                     self.raiseAnError(
                         IOError, "The metric: ", metricInstance.name,
                         " can not be used, the accepted metric types are: ",
                         str(self.validMetrics))
                 varName = 'cv' + '_' + metricInstance.name + '_' + targetName
                 if varName not in outputDict.keys():
                     outputDict[varName] = np.array([])
                 outputDict[varName] = np.append(outputDict[varName],
                                                 metricValue)
     scoreDict = {}
     if not self.cvScore:
         return outputDict
     else:
         for varName, metricValues in outputDict.items():
             if self.cvScore.lower() == 'maximum':
                 scoreDict[varName] = np.atleast_1d(
                     np.amax(np.atleast_1d(metricValues)))
             elif self.cvScore.lower() == 'median':
                 scoreDict[varName] = np.atleast_1d(
                     np.median(np.atleast_1d(metricValues)))
             elif self.cvScore.lower() == 'average':
                 scoreDict[varName] = np.atleast_1d(
                     np.mean(np.atleast_1d(metricValues)))
         return scoreDict