def _localNormalizeData(self,values,names,feat): """ Method to normalize data based on the mean and standard deviation. If undesired for a particular ROM, this method can be overloaded to simply pass (see, e.g., GaussPolynomialRom). @ In, values, list, list of feature values (from tdict) @ In, names, list, names of features (from tdict) @ In, feat, list, list of features (from ROM) @ Out, None """ self.muAndSigmaFeatures[feat] = mathUtils.normalizationFactors(values[names.index(feat)])
def _weightAndScaleClusters(self, features, featureGroups, clusterFeatures, weightingStrategy): """ Applies normalization and weighting to cluster training features. @ In, features, list(str), ordered list of features @ In, featureGroups, dict, hierarchal structure of requested features @ In, clusterFeaturs, dict, features mapped to arrays of values (per ROM) @ In, weightingStrategy, str, weighting strategy to use in ROM metrics @ Out, clusterFeatures, dict, weighted and scaled feature space (destructive on original dict) """ # initialize structure weights = np.zeros(len(features)) for f, feature in enumerate(features): # scale the data data = np.asarray(clusterFeatures[feature]) # using Z normalization allows the data that is truly far apart to be streched, ## while data that is close together remains clustered. ## This does not work well if SMALL relative differences SHOULD make a big difference in clustering, ## or if LARGE relative distances should NOT make a big difference in clustering! loc, scale = mathUtils.normalizationFactors(data, mode='z') clusterFeatures[feature] = (data - loc) / scale # weight the data --> NOTE doesn't really work like we think it does! _, metric, ID = feature.split('|', 2) if weightingStrategy == 'uniform': weight = 1.0 else: # TODO when this gets moved to an input spec, we won't need to check it here. ## for now, though, it's the only option. self.raiseAnError( RuntimeError, 'Unrecognized weighting strategy: "{}"!'.format( weightingStrategy)) weights[f] = weight for f, feature in enumerate(features): clusterFeatures[feature] = clusterFeatures[feature] * weights[f] return clusterFeatures
checkArray('NDInArray %s entry' %str(findSmall),entry,points[1]) found,idx,entry = mathUtils.NDInArray(points,findSmall,tol=1e-3) checkAnswer('NDInArray %s not found' %str(findSmall),int(found),0) checkType('NDInArray %s no idx' %str(findSmall),idx,None) checkType('NDInArray %s no entry' %str(findSmall),entry,None) found,idx,entry = mathUtils.NDInArray(points,findLarge,tol=1e-8) checkAnswer('NDInArray %s found' %str(findLarge),int(found),1) checkAnswer('NDInArray %s idx' %str(findLarge),idx,0) checkArray('NDInArray %s entry' %str(findLarge),entry,points[0]) ### check "normalizationFactors" zeroList = [0,0,0,0,0] fourList = [4,4,4,4,4] sequentialList = [0,1,2,3,4] factors = mathUtils.normalizationFactors(zeroList, mode='z') checkArray('Z-score normalization zeroList: ', factors, (0,1)) factors = mathUtils.normalizationFactors(zeroList, mode='scale') checkArray('0-1 scaling zeroList: ', factors, (0,1)) factors = mathUtils.normalizationFactors(zeroList, mode='none') checkArray('No scaling zeroList: ', factors, (0,1)) factors = mathUtils.normalizationFactors(fourList, mode='z') checkArray('Z-score normalization fourList: ', factors, (4,4)) factors = mathUtils.normalizationFactors(fourList, mode='scale') checkArray('0-1 scaling fourList: ', factors, (4,4)) factors = mathUtils.normalizationFactors(fourList, mode='none') checkArray('No scaling fourList: ', factors, (0,1)) factors = mathUtils.normalizationFactors(sequentialList, mode='z') checkArray('Z-score normalization sequentialList: ', factors, (2,1.41421356237))