def _handleInput(self, paramInput): """ Function to handle the parsed paramInput for this class. @ In, paramInput, ParameterInput, the already parsed input. @ Out, None """ ## By default, we want to name the 'labels' by the name of this ## postprocessor, but that name is not available before processing the XML ## At this point, we have that information self.initializationOptionDict = {} for child in paramInput.subparts: if child.getName() == 'KDD': if len(child.parameterValues) > 0: ## I'm not sure what this thing is used for, but it seems to make more ## sense to only put data that is not otherwise handled rather than ## put all of the information and then to remove the ones we process. ## - dpm 6/8/16 self.initializationOptionDict[child.getName()] = {} for key,value in child.parameterValues.items(): if key == 'lib': self.type = value elif key == 'labelFeature': self.labelFeature = value else: self.initializationOptionDict[child.getName()][key] = value else: self.initializationOptionDict[child.getName()] = utils.tryParse(child.value) for childChild in child.subparts: if len(childChild.parameterValues) > 0 and not childChild.getName() == 'PreProcessor': self.initializationOptionDict[child.getName()][childChild.getName()] = dict(childChild.parameterValues) else: self.initializationOptionDict[child.getName()][childChild.getName()] = utils.tryParse(childChild.value) elif child.getName() == 'pivotParameter': self.pivotParameter = child.value if not hasattr(self, 'pivotParameter'): #TODO, if doing time dependent data mining that needs this, an error # should be thrown self.pivotParameter = None if self.type: #TODO unSurpervisedEngine needs to be able to handle both methods # without this if statement. if self.pivotParameter is not None: self.unSupervisedEngine = unSupervisedLearning.returnInstance("temporalSciKitLearn", self, **self.initializationOptionDict['KDD']) else: self.unSupervisedEngine = unSupervisedLearning.returnInstance(self.type, self, **self.initializationOptionDict['KDD']) else: self.raiseAnError(IOError, 'No Data Mining Algorithm is supplied!') ## If the user has not defined a label feature, then we will force it to be ## named by the PostProcessor name followed by: ## the word 'Labels' for clustering/GMM models; ## the word 'Dimension' + a numeric id for dimensionality reduction ## algorithms if self.labelFeature is None: if self.unSupervisedEngine.getDataMiningType() in ['cluster','mixture']: self.labelFeature = self.name+'Labels' elif self.unSupervisedEngine.getDataMiningType() in ['decomposition','manifold']: self.labelFeature = self.name+'Dimension'
def _localReadMoreXML(self, xmlNode): """ Method that reads the portion of the xml input that belongs to this specialized class and initializes internal parameters @ In, xmlNode, xml.etree.Element, Xml element node @ Out, None """ self.distParams = {} for child in xmlNode: if child.tag == 'metricType': self.metricType = child.text else: self.distParams[str(child.tag)] = utils.tryParse(child.text) availableMetrics = pairwise.kernel_metrics().keys( ) + pairwise.distance_metrics().keys() + scores.keys() if self.metricType not in availableMetrics: metricList = ', '.join( availableMetrics[:-1]) + ', or ' + availableMetrics[-1] self.raiseAnError( IOError, 'Metric SKL error: metricType ' + str(self.metricType) + ' is not available. Available metrics are: ' + metricList + '.') for key, value in self.distParams.items(): try: newValue = ast.literal_eval(value) if type(newValue) == list: newValue = np.asarray(newValue) self.distParams[key] = newValue except: self.distParams[key] = value
def tryStrParse(s): """ Trys to parse if it is stringish @ In, s, string, possible string @ Out, s, string, original type, or possibly parsed string """ return utils.tryParse(s) if type(s).__name__ in ['str','unicode'] else s
def _localInputAndCheckParam(self, inputParam): """ Function to read the portion of the xml input @ In, inputParam, ParameterInput, the xml element node that will be checked against the available options specific to this Sampler @ Out, initDict, dict, dictionary contains the information about the given xml node """ initDict = {} for child in inputParam.subparts: if len(child.parameterValues) > 0: initDict[child.getName()] = dict(child.parameterValues) else: initDict[child.getName()] = utils.tryParse(child.value) return initDict
def _localInputAndCheck(self, xmlNode): """ Function to read the portion of the xml input @ In, xmlNode, xml.etree.ElementTree Element Objects, the xml element node that will be checked against the available options specific to this Sampler @ Out, initDict, dict, dictionary contains the information about the given xml node """ initDict = {} for child in xmlNode: if child.attrib: initDict[child.tag] = dict(child.attrib) else: initDict[child.tag] = utils.tryParse(child.text) return initDict
def _localReadMoreXML(self, xmlNode): """ Function that reads the portion of the xml input that belongs to this specialized class and initializes some elements based on the inputs got @ In, xmlNode, xml.etree.Element, Xml element node @ Out, None """ # paramInput = DataMining.getInputSpecification()() # paramInput.parseNode(xmlNode) ## By default, we want to name the 'labels' by the name of this ## postprocessor, but that name is not available before processing the XML ## At this point, we have that information self.initializationOptionDict = {} for child in xmlNode: if child.tag == 'KDD': if child.attrib: ## I'm not sure what this thing is used for, but it seems to make more ## sense to only put data that is not otherwise handled rather than ## put all of the information and then to remove the ones we process. ## - dpm 6/8/16 self.initializationOptionDict[child.tag] = {} for key,value in child.attrib.iteritems(): if key == 'lib': self.type = value elif key == 'labelFeature': self.labelFeature = value else: self.initializationOptionDict[child.tag][key] = value else: self.initializationOptionDict[child.tag] = utils.tryParse(child.text) for childChild in child: if childChild.attrib and not childChild.tag == 'PreProcessor': self.initializationOptionDict[child.tag][childChild.tag] = dict(childChild.attrib) else: self.initializationOptionDict[child.tag][childChild.tag] = utils.tryParse(childChild.text) elif child.tag == 'pivotParameter': self.pivotParameter = child.text if not hasattr(self, 'pivotParameter'): #TODO, if doing time dependent data mining that needs this, an error # should be thrown self.pivotParameter = None if self.type: #TODO unSurpervisedEngine needs to be able to handle both methods # without this if statement. if self.pivotParameter is not None: self.unSupervisedEngine = unSupervisedLearning.returnInstance("temporalSciKitLearn", self, **self.initializationOptionDict['KDD']) else: self.unSupervisedEngine = unSupervisedLearning.returnInstance(self.type, self, **self.initializationOptionDict['KDD']) else: self.raiseAnError(IOError, 'No Data Mining Algorithm is supplied!') ## If the user has not defined a label feature, then we will force it to be ## named by the PostProcessor name followed by: ## the word 'Labels' for clustering/GMM models; ## the word 'Dimension' + a numeric id for dimensionality reduction ## algorithms if self.labelFeature is None: if self.unSupervisedEngine.getDataMiningType() in ['cluster','mixture']: self.labelFeature = self.name+'Labels' elif self.unSupervisedEngine.getDataMiningType() in ['decomposition','manifold']: self.labelFeature = self.name+'Dimension'