checkAnswer('calculateStats.sampleVariance',moms['sampleVariance'], 0.09127 ,1e-5) ### check "historySetWindows" # TODO I think this takes a historySet? Documentation is poor. ### check "convertNumpyToLists" datDict = {'a':np.array([1,2,3,4,5]), 'b':np.array([2,3,4,5]), 'c':np.array([3,4,5])} datMat = np.array([ np.array([1,2,3]), np.array([4,5,6]), np.array([7,8,9]), ]) datAry = np.array([1,2,3,4]) convDict = mathUtils.convertNumpyToLists(datDict) convMat = mathUtils.convertNumpyToLists(datMat) convAry = mathUtils.convertNumpyToLists(datAry) for v,(key,value) in enumerate(convDict.items()): checkType('convertNumpyToList.Dict[%i].type)' %v,value,[]) checkArray('convertNumpyToList.Dict[%i].values)' %v,value,datDict[key]) checkType('convertNumpyToList.Matrix.type)',convMat,[]) for e,entry in enumerate(convMat): checkType('convertNumpyToList.Matrix[%i].type)' %e,entry,[]) checkArray('convertNumpyToList.Dict[%i].values)' %e,entry,datMat[e]) checkType('convertNumpyToList.Array.type)',convAry,[]) checkArray('convertNumpyToList.Array.values)',convAry,datAry) ### check "interpolateFunction" # TODO some documentation missing
def addGroupDataObjects(self,groupName,attributes,source,upGroup=False,specificVars=None): """ Function to add a data (class DataObjects) or Dictionary into the Database @ In, groupName, string, group name @ In, attributes, dict, dictionary of attributes that must be added as metadata @ In, source, dataObject, source data @ In, upGroup, bool, optional, updated group? @ In, specificVars, list(str), if not None then indicates a selective list of variables to include in DB @ Out, None """ if not upGroup: for index in xrange(len(self.allGroupPaths)): comparisonName = self.allGroupPaths[index] splittedPath=comparisonName.split('/') for splgroup in splittedPath: if groupName == splgroup and splittedPath[0] == self.parentGroupName: found = True while found: if groupName in splittedPath: found = True else: found = False groupName = groupName + "_"+ str(index) #self.raiseAnError(IOError,"Group named " + groupName + " already present in database " + self.name + ". new group " + groupName + " is equal to old group " + comparisonName) parentName = self.parentGroupName.replace('/', '') # Create the group if parentName != '/': parentGroupName = self.__returnParentGroupPath(parentName) # Retrieve the parent group from the HDF5 database if parentGroupName in self.h5FileW: parentGroupObj = self.h5FileW.require_group(parentGroupName) else: self.raiseAnError(ValueError,'NOT FOUND group named ' + parentGroupObj) else: parentGroupObj = self.h5FileW if type(source['name']) == dict: # create the group if upGroup: groups = parentGroupObj.require_group(groupName) del groups[groupName+"_data"] else: groups = parentGroupObj.create_group(groupName) groups.attrs[b'mainClass' ] = b'PythonType' groups.attrs[b'sourceType'] = b'Dictionary' # I keep this structure here because I want to maintain the possibility to add a whatever dictionary even if not prepared and divided into output and input sub-sets. A.A. # use ONLY the subset of variables if requested if set(['inputSpaceParams']).issubset(set(source['name'].keys())): sourceInputs = source['name']['inputSpaceParams'].keys() if specificVars is not None: inputHeaders = list(var for var in sourceInputs if var in specificVars) else: inputHeaders = sourceInputs inputHeaders = list(utils.toBytesIterative(inputHeaders)) groups.attrs[b'inputSpaceHeaders' ] = inputHeaders if specificVars is not None: inputValues = list(source['name']['inputSpaceParams'][var] for var in sourceInputs if var in specificVars) else: inputValues = source['name']['inputSpaceParams'].values() inputValues = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(utils.toBytesIterative(inputValues))))) groups.attrs[b'inputSpaceValues' ] = inputValues if set(['outputSpaceParams']).issubset(set(source['name'].keys())): if specificVars is not None: outDict = dict((k,v) for k,v in source['name']['outputSpaceParams'].items() if k in specificVars) else: outDict = source['name']['outputSpaceParams'] else: if specificVars is not None: outDict = dict((key,value) for (key,value) in source['name'].iteritems() if key not in ['inputSpaceParams'] and key in specificVars) else: outDict = dict((key,value) for (key,value) in source['name'].iteritems() if key not in ['inputSpaceParams']) outHeaders = utils.toBytesIterative(list(outDict.keys())) outValues = utils.toBytesIterative(list(outDict.values())) groups.attrs[b'nParams' ] = len(outHeaders) groups.attrs[b'outputSpaceHeaders'] = outHeaders groups.attrs[b'EndGroup' ] = True groups.attrs[b'parentID' ] = parentName maxSize = 0 for value in outValues: if type(value) == np.ndarray or type(value).__name__ == 'c1darray': if maxSize < value.size: actualOne = np.asarray(value).size elif type(value) in [int,float,bool,np.float64,np.float32,np.float16,np.int64,np.int32,np.int16,np.int8,np.bool8]: actualOne = 1 else: self.raiseAnError(IOError,'The type of the dictionary parameters must be within float,bool,int,numpy.ndarray.Got '+type(value).__name__) if maxSize < actualOne: maxSize = actualOne groups.attrs[b'nTimeSteps' ] = maxSize dataout = np.zeros((maxSize,len(outHeaders))) for index in range(len(outHeaders)): if type(outValues[index]) == np.ndarray or type(value).__name__ == 'c1darray': dataout[0:outValues[index].size,index] = np.ravel(outValues[index])[:] else: dataout[0,index] = outValues[index] # create the data set groups.create_dataset(groupName + "_data", dtype="float", data=dataout) # add metadata if present for attr in attributes.keys(): objectToConvert = mathUtils.convertNumpyToLists(attributes[attr]) converted = json.dumps(objectToConvert) if converted and attr != 'name': groups.attrs[utils.toBytes(attr)]=converted if parentGroupName != "/": self.allGroupPaths.append(parentGroupName + "/" + groupName) self.allGroupEnds[parentGroupName + "/" + groupName] = True else: self.allGroupPaths.append("/" + groupName) self.allGroupEnds["/" + groupName] = True else: # Data(structure) # Retrieve the headers from the data (inputs and outputs) inpParams = source['name'].getInpParametersValues().keys() outParams = source['name'].getOutParametersValues().keys() if specificVars is not None: headersIn = list(v for v in inpParams if v in specificVars) headersOut = list(v for v in outParams if v in specificVars) else: headersIn = list(inpParams) headersOut = list(outParams) # for a "HistorySet" type we create a number of groups = number of HistorySet (compatibility with loading structure) if specificVars is not None: dataIn = list(source['name'].getInpParametersValues()[v] for v in inpParams if v in specificVars) dataOut = list(source['name'].getOutParametersValues()[v] for v in outParams if v in specificVars) else: dataIn = list(source['name'].getInpParametersValues().values()) dataOut = list(source['name'].getOutParametersValues().values()) # FIXME unused, but left commented because I'm not sure why they're unused. PT #headersInUnstructured = list(source['name'].getInpParametersValues(self,unstructuredInputs=True).keys()) #dataInUnstructured = list(source['name'].getInpParametersValues(self,unstructuredInputs=True).values()) metadata = source['name'].getAllMetadata() if source['name'].type in ['HistorySet','PointSet']: groups = [] if 'HistorySet' in source['name'].type: nruns = len(dataIn) else: nruns = dataIn[0].size for run in range(nruns): if upGroup: groups.append(parentGroupObj.require_group(groupName + b'|' +str(run))) if (groupName + "_data") in groups[run]: del groups[run][groupName+"_data"] else: groups.append(parentGroupObj.create_group(groupName + '|' +str(run))) groups[run].attrs[b'sourceType'] = utils.toBytes(source['name'].type) groups[run].attrs[b'mainClass' ] = b'DataObjects' groups[run].attrs[b'EndGroup' ] = True groups[run].attrs[b'parentID' ] = parentName if source['name'].type == 'HistorySet': groups[run].attrs[b'inputSpaceHeaders' ] = [utils.toBytes(list(dataIn[run].keys())[i]) for i in range(len(dataIn[run].keys()))] groups[run].attrs[b'outputSpaceHeaders'] = [utils.toBytes(list(dataOut[run].keys())[i]) for i in range(len(dataOut[run].keys()))] json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(dataIn[run].values())))) groups[run].attrs[b'inputSpaceValues' ] = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(dataIn[run].values())))) groups[run].attrs[b'nParams' ] = len(dataOut[run].keys()) #collect the outputs dataout = np.zeros((next(iter(dataOut[run].values())).size,len(dataOut[run].values()))) for param in range(len(dataOut[run].values())): dataout[:,param] = list(dataOut[run].values())[param][:] groups[run].create_dataset(groupName +'|' +str(run)+"_data" , dtype="float", data=dataout) groups[run].attrs[b'nTimeSteps' ] = next(iter(dataOut[run].values())).size else: groups[run].attrs[b'inputSpaceHeaders' ] = [utils.toBytes(headersIn[i]) for i in range(len(headersIn))] groups[run].attrs[b'outputSpaceHeaders'] = [utils.toBytes(headersOut[i]) for i in range(len(headersOut))] groups[run].attrs[b'inputSpaceValues' ] = json.dumps([list(utils.toListFromNumpyOrC1arrayIterative(np.atleast_1d(np.array(dataIn[x][run])).tolist())) for x in range(len(dataIn))]) groups[run].attrs[b'nParams' ] = len(headersOut) groups[run].attrs[b'nTimeSteps' ] = 1 #collect the outputs dataout = np.zeros((1,len(dataOut))) for param in range(len(dataOut)): dataout[0,param] = dataOut[param][run] groups[run].create_dataset(groupName +'|' +str(run)+"_data", dtype="float", data=dataout) # add metadata if present for attr in attributes.keys(): objectToConvert = mathUtils.convertNumpyToLists(attributes[attr]) converted = json.dumps(objectToConvert) if converted and attr != 'name': groups[run].attrs[utils.toBytes(attr)]=converted for attr in metadata.keys(): if len(metadata[attr]) == nruns: toProcess = metadata[attr][run] else: toProcess = metadata[attr] if type(toProcess).__name__ == 'list' and 'input' in attr.lower() and isinstance(toProcess[0],Files.File): objectToConvert = list(a.__getstate__() for a in toProcess) elif isinstance(toProcess,Files.File): objectToConvert =toProcess.__getstate__() else: objectToConvert = mathUtils.convertNumpyToLists(toProcess) converted = json.dumps(objectToConvert) if converted and attr != 'name': groups[run].attrs[utils.toBytes(attr)]=converted if parentGroupName != "/": self.allGroupPaths.append(parentGroupName + "/" + groupName + '|' +str(run)) self.allGroupEnds[parentGroupName + "/" + groupName + '|' +str(run)] = True else: self.allGroupPaths.append("/" + groupName + '|' +str(run)) self.allGroupEnds["/" + groupName + '|' +str(run)] = True else: self.raiseAnError(IOError,'The function addGroupDataObjects accepts Data(s) or dictionaries as inputs only!!!!!')
def __addSubGroup(self,groupName,attributes,source): """ Function to add a group into the database (Hierarchical) @ In, groupName, string, group name @ In, attributes, dict, dictionary of attributes that must be added as metadata @ In, source, File object, source data @ Out, None """ for index in xrange(len(self.allGroupPaths)): comparisonName = self.allGroupPaths[index] splittedPath=comparisonName.split('/') for splgroup in splittedPath: if groupName == splgroup and splittedPath[0] == self.parentGroupName: self.raiseAnError(IOError,"Group named " + groupName + " already present in database " + self.name + ". new group " + groupName + " is equal to old group " + comparisonName) if source['type'] == 'csv': # Source in CSV format f = open(source['name'],'rb') # Retrieve the headers of the CSV file headers = f.readline().split(b",") # Load the csv into a numpy array(n time steps, n parameters) data = np.loadtxt(f,dtype='float',delimiter=',',ndmin=2) # Check if the parent attribute is not null # In this case append a subgroup to the parent group # Otherwise => it's the main group parentID = None if 'metadata' in attributes.keys(): if 'parentID' in attributes['metadata'].keys(): parentID = attributes['metadata']['parentID'] else: if 'parentID' in attributes.keys(): parentID = attributes['parentID'] if parentID: parentName = parentID else: self.raiseAnError(IOError,'NOT FOUND attribute <parentID> into <attributes> dictionary') # Find parent group path if parentName != '/': parentGroupName = self.__returnParentGroupPath(parentName) else: parentGroupName = parentName # Retrieve the parent group from the HDF5 database if parentGroupName in self.h5FileW: grp = self.h5FileW.require_group(parentGroupName) else: # try to guess the parentID from the file name head,tail = os.path.split(os.path.dirname(source['name'])) testParentName = self.__returnParentGroupPath(tail[:-2]) if testParentName in self.h5FileW: grp = self.h5FileW.require_group(testParentName) else: closestGroup = difflib.get_close_matches(parentName, self.allGroupPaths, n=1, cutoff=0.01) errorString = ' NOT FOUND parent group named "' + str(parentName)+'" for loading file '+str(source['name']) errorString+= '\n Tried '+str(tail[:-2])+ ' but not found as well. All group paths are:\n -'+'\n -'.join(self.allGroupPaths) errorString+= '\n Closest parent group found is "'+str(closestGroup[0] if len(closestGroup) > 0 else 'None')+'"!' self.raiseAnError(ValueError,errorString) # The parent group is not the endgroup for this branch self.allGroupEnds[parentGroupName] = False grp.attrs["EndGroup"] = False self.raiseAMessage('Adding group named "' + groupName + '" in Database "'+ self.name +'"') # Create the sub-group sgrp = grp.create_group(groupName) # Create data set in this new group sgrp.create_dataset(groupName+"_data", dtype="float", data=data) # Add the metadata sgrp.attrs["outputSpaceHeaders" ] = headers sgrp.attrs["nParams" ] = data[0,:].size sgrp.attrs["parent" ] = "root" sgrp.attrs["startTime"] = data[0,0] sgrp.attrs["end_time" ] = data[data[:,0].size-1,0] sgrp.attrs["nTimeSteps" ] = data[:,0].size sgrp.attrs["EndGroup" ] = True sgrp.attrs["sourceType"] = source['type'] if source['type'] == 'csv': sgrp.attrs["sourceFile"] = source['name'] # add metadata if present for attr in attributes.keys(): if attr == 'metadata': if 'SampledVars' in attributes['metadata'].keys(): inpHeaders = [] inpValues = [] for inkey, invalue in attributes['metadata']['SampledVars'].items(): if inkey not in headers: inpHeaders.append(utils.toBytes(inkey)) inpValues.append(invalue) if len(inpHeaders) > 0: sgrp.attrs[b'inputSpaceHeaders'] = inpHeaders sgrp.attrs[b'inputSpaceValues' ] = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(inpValues)))) #Files objects are not JSON serializable, so we have to cover that. #this doesn't cover all possible circumstance, but it covers the DET case. if attr == 'inputFile' and isinstance(attributes[attr][0],Files.File): objectToConvert = list(a.__getstate__() for a in attributes[attr]) else: objectToConvert = mathUtils.convertNumpyToLists(attributes[attr]) converted = json.dumps(objectToConvert) if converted and attr != 'name': sgrp.attrs[utils.toBytes(attr)]=converted else: pass # The sub-group is the new ending group if parentGroupName != "/": self.allGroupPaths.append(parentGroupName + "/" + groupName) self.allGroupEnds[parentGroupName + "/" + groupName] = True else: self.allGroupPaths.append("/" + groupName) self.allGroupEnds["/" + groupName] = True return
def __addGroupRootLevel(self,groupName,attributes,source,upGroup=False): """ Function to add a group into the database (root level) @ In, groupName, string, group name @ In, attributes, dict, dictionary of attributes that must be added as metadata @ In, source, File object, source file @ In, upGroup, bool, optional, updated group? @ Out, None """ # Check in the "self.allGroupPaths" list if a group is already present... # If so, error (Deleting already present information is not desiderable) if not upGroup: for index in xrange(len(self.allGroupPaths)): comparisonName = self.allGroupPaths[index] splittedPath=comparisonName.split('/') for splgroup in splittedPath: if groupName == splgroup and splittedPath[0] == self.parentGroupName: self.raiseAnError(IOError,"Group named " + groupName + " already present in database " + self.name + ". new group " + groupName + " is equal to old group " + comparisonName) if source['type'] == 'csv': # Source in CSV format f = open(source['name'],'rb') # Retrieve the headers of the CSV file firstRow = f.readline().strip(b"\r\n") #firstRow = f.readline().translate(None,"\r\n") headers = firstRow.split(b",") # if there is the alias system, replace the variable name if 'alias' in attributes.keys(): for aliasType in attributes['alias'].keys(): for var in attributes['alias'][aliasType].keys(): if attributes['alias'][aliasType][var].strip() in headers: headers[headers.index(attributes['alias'][aliasType][var].strip())] = var.strip() else: metadataPresent = True if 'metadata' in attributes.keys() and 'SampledVars' in attributes['metadata'].keys() else False if not (metadataPresent and var.strip() in attributes['metadata']['SampledVars'].keys()): self.raiseAWarning('the ' + aliasType +' alias"'+var.strip()+'" has been defined but has not been found among the variables!') # Load the csv into a numpy array(n time steps, n parameters) data = np.loadtxt(f,dtype='float',delimiter=',',ndmin=2) # First parent group is the root name parentName = self.parentGroupName.replace('/', '') # Create the group if parentName != '/': parentGroupName = self.__returnParentGroupPath(parentName) # Retrieve the parent group from the HDF5 database if parentGroupName in self.h5FileW: rootgrp = self.h5FileW.require_group(parentGroupName) else: self.raiseAnError(ValueError,'NOT FOUND group named "' + parentGroupName+'" for loading file '+str(source['name'])) if upGroup: grp = rootgrp.require_group(groupName) del grp[groupName+"_data"] else: grp = rootgrp.create_group(groupName) else: if upGroup: grp = self.h5FileW.require_group(groupName) else: grp = self.h5FileW.create_group(groupName) self.raiseAMessage('Adding group named "' + groupName + '" in DataBase "'+ self.name +'"') # Create dataset in this newly added group grp.create_dataset(groupName+"_data", dtype="float", data=data) # Add metadata grp.attrs["outputSpaceHeaders" ] = headers grp.attrs["nParams" ] = data[0,:].size grp.attrs["parentID" ] = "root" grp.attrs["startTime" ] = data[0,0] grp.attrs["end_time" ] = data[data[:,0].size-1,0] grp.attrs["nTimeSteps" ] = data[:,0].size grp.attrs["EndGroup" ] = True grp.attrs["sourceType" ] = source['type'] if source['type'] == 'csv': grp.attrs["sourceFile"] = source['name'] for attr in attributes.keys(): if attr == 'metadata': if 'SampledVars' in attributes['metadata'].keys(): inpHeaders = [] inpValues = [] for inkey, invalue in attributes['metadata']['SampledVars'].items(): if inkey not in headers: inpHeaders.append(utils.toBytes(inkey)) inpValues.append(invalue) if len(inpHeaders) > 0: grp.attrs[b'inputSpaceHeaders'] = inpHeaders grp.attrs[b'inputSpaceValues' ] = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list( inpValues)))) objectToConvert = mathUtils.convertNumpyToLists(attributes[attr]) for o,obj in enumerate(objectToConvert): if isinstance(obj,Files.File): objectToConvert[o]=obj.getFilename() converted = json.dumps(objectToConvert) if converted and attr != 'name': grp.attrs[utils.toBytes(attr)]=converted #decoded = json.loads(grp.attrs[utils.toBytes(attr)]) if "inputFile" in attributes.keys(): grp.attrs[utils.toString("inputFile")] = utils.toString(" ".join(attributes["inputFile"])) if type(attributes["inputFile"]) == type([]) else utils.toString(attributes["inputFile"]) else: self.raiseAnError(ValueError,source['type'] + " unknown!") # Add the group name into the list "self.allGroupPaths" and # set the relative bool flag into the dictionary "self.allGroupEnds" if parentGroupName != "/": self.allGroupPaths.append(parentGroupName + "/" + groupName) self.allGroupEnds[parentGroupName + "/" + groupName] = True else: self.allGroupPaths.append("/" + groupName) self.allGroupEnds["/" + groupName] = True