예제 #1
0
checkAnswer('calculateStats.sampleVariance',moms['sampleVariance'], 0.09127 ,1e-5)

### check "historySetWindows"
# TODO I think this takes a historySet?  Documentation is poor.

### check "convertNumpyToLists"
datDict = {'a':np.array([1,2,3,4,5]),
           'b':np.array([2,3,4,5]),
           'c':np.array([3,4,5])}
datMat = np.array([
           np.array([1,2,3]),
           np.array([4,5,6]),
           np.array([7,8,9]),
           ])
datAry = np.array([1,2,3,4])
convDict = mathUtils.convertNumpyToLists(datDict)
convMat  = mathUtils.convertNumpyToLists(datMat)
convAry  = mathUtils.convertNumpyToLists(datAry)
for v,(key,value) in enumerate(convDict.items()):
  checkType('convertNumpyToList.Dict[%i].type)' %v,value,[])
  checkArray('convertNumpyToList.Dict[%i].values)' %v,value,datDict[key])
checkType('convertNumpyToList.Matrix.type)',convMat,[])
for e,entry in enumerate(convMat):
  checkType('convertNumpyToList.Matrix[%i].type)' %e,entry,[])
  checkArray('convertNumpyToList.Dict[%i].values)' %e,entry,datMat[e])
checkType('convertNumpyToList.Array.type)',convAry,[])
checkArray('convertNumpyToList.Array.values)',convAry,datAry)


### check "interpolateFunction"
# TODO some documentation missing
예제 #2
0
  def addGroupDataObjects(self,groupName,attributes,source,upGroup=False,specificVars=None):
    """
      Function to add a data (class DataObjects) or Dictionary into the Database
      @ In, groupName, string, group name
      @ In, attributes, dict, dictionary of attributes that must be added as metadata
      @ In, source, dataObject, source data
      @ In, upGroup, bool, optional, updated group?
      @ In, specificVars, list(str), if not None then indicates a selective list of variables to include in DB
      @ Out, None
    """
    if not upGroup:
      for index in xrange(len(self.allGroupPaths)):
        comparisonName = self.allGroupPaths[index]
        splittedPath=comparisonName.split('/')
        for splgroup in splittedPath:
          if groupName == splgroup and splittedPath[0] == self.parentGroupName:
            found = True
            while found:
              if groupName in splittedPath:
                found = True
              else:
                found = False
              groupName = groupName + "_"+ str(index)
            #self.raiseAnError(IOError,"Group named " + groupName + " already present in database " + self.name + ". new group " + groupName + " is equal to old group " + comparisonName)
    parentName = self.parentGroupName.replace('/', '')
    # Create the group
    if parentName != '/':
      parentGroupName = self.__returnParentGroupPath(parentName)
      # Retrieve the parent group from the HDF5 database
      if parentGroupName in self.h5FileW:
        parentGroupObj = self.h5FileW.require_group(parentGroupName)
      else:
        self.raiseAnError(ValueError,'NOT FOUND group named ' + parentGroupObj)
    else:
      parentGroupObj = self.h5FileW

    if type(source['name']) == dict:
      # create the group
      if upGroup:
        groups = parentGroupObj.require_group(groupName)
        del groups[groupName+"_data"]
      else:
        groups = parentGroupObj.create_group(groupName)
      groups.attrs[b'mainClass' ] = b'PythonType'
      groups.attrs[b'sourceType'] = b'Dictionary'
      # I keep this structure here because I want to maintain the possibility to add a whatever dictionary even if not prepared and divided into output and input sub-sets. A.A.
      # use ONLY the subset of variables if requested
      if set(['inputSpaceParams']).issubset(set(source['name'].keys())):
        sourceInputs = source['name']['inputSpaceParams'].keys()
        if specificVars is not None:
          inputHeaders = list(var for var in sourceInputs if var in specificVars)
        else:
          inputHeaders = sourceInputs
        inputHeaders = list(utils.toBytesIterative(inputHeaders))
        groups.attrs[b'inputSpaceHeaders' ] = inputHeaders

        if specificVars is not None:
          inputValues = list(source['name']['inputSpaceParams'][var] for var in sourceInputs if var in specificVars)
        else:
          inputValues = source['name']['inputSpaceParams'].values()
        inputValues = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(utils.toBytesIterative(inputValues)))))
        groups.attrs[b'inputSpaceValues'  ] = inputValues

      if set(['outputSpaceParams']).issubset(set(source['name'].keys())):
        if specificVars is not None:
          outDict = dict((k,v) for k,v in source['name']['outputSpaceParams'].items() if k in specificVars)
        else:
          outDict = source['name']['outputSpaceParams']
      else:
        if specificVars is not None:
          outDict = dict((key,value) for (key,value) in source['name'].iteritems() if key not in ['inputSpaceParams'] and key in specificVars)
        else:
          outDict = dict((key,value) for (key,value) in source['name'].iteritems() if key not in ['inputSpaceParams'])
      outHeaders = utils.toBytesIterative(list(outDict.keys()))
      outValues  = utils.toBytesIterative(list(outDict.values()))
      groups.attrs[b'nParams'   ] = len(outHeaders)
      groups.attrs[b'outputSpaceHeaders'] = outHeaders
      groups.attrs[b'EndGroup'   ] = True
      groups.attrs[b'parentID'  ] = parentName
      maxSize = 0
      for value in outValues:
        if type(value) == np.ndarray or type(value).__name__ == 'c1darray':
          if maxSize < value.size:
            actualOne = np.asarray(value).size
        elif type(value) in [int,float,bool,np.float64,np.float32,np.float16,np.int64,np.int32,np.int16,np.int8,np.bool8]:
          actualOne = 1
        else:
          self.raiseAnError(IOError,'The type of the dictionary parameters must be within float,bool,int,numpy.ndarray.Got '+type(value).__name__)
        if maxSize < actualOne:
          maxSize = actualOne
      groups.attrs[b'nTimeSteps'  ] = maxSize
      dataout = np.zeros((maxSize,len(outHeaders)))
      for index in range(len(outHeaders)):
        if type(outValues[index]) == np.ndarray or type(value).__name__ == 'c1darray':
          dataout[0:outValues[index].size,index] =  np.ravel(outValues[index])[:]
        else:
          dataout[0,index] = outValues[index]
      # create the data set
      groups.create_dataset(groupName + "_data", dtype="float", data=dataout)
      # add metadata if present
      for attr in attributes.keys():
        objectToConvert = mathUtils.convertNumpyToLists(attributes[attr])
        converted = json.dumps(objectToConvert)
        if converted and attr != 'name':
          groups.attrs[utils.toBytes(attr)]=converted
      if parentGroupName != "/":
        self.allGroupPaths.append(parentGroupName + "/" + groupName)
        self.allGroupEnds[parentGroupName + "/" + groupName] = True
      else:
        self.allGroupPaths.append("/" + groupName)
        self.allGroupEnds["/" + groupName] = True
    else:
      # Data(structure)
      # Retrieve the headers from the data (inputs and outputs)
      inpParams = source['name'].getInpParametersValues().keys()
      outParams = source['name'].getOutParametersValues().keys()
      if specificVars is not None:
        headersIn  = list(v for v in inpParams if v in specificVars)
        headersOut = list(v for v in outParams if v in specificVars)
      else:
        headersIn  = list(inpParams)
        headersOut = list(outParams)
      # for a "HistorySet" type we create a number of groups = number of HistorySet (compatibility with loading structure)
      if specificVars is not None:
        dataIn  = list(source['name'].getInpParametersValues()[v] for v in inpParams if v in specificVars)
        dataOut = list(source['name'].getOutParametersValues()[v] for v in outParams if v in specificVars)
      else:
        dataIn  = list(source['name'].getInpParametersValues().values())
        dataOut = list(source['name'].getOutParametersValues().values())
      # FIXME unused, but left commented because I'm not sure why they're unused.  PT
      #headersInUnstructured = list(source['name'].getInpParametersValues(self,unstructuredInputs=True).keys())
      #dataInUnstructured    = list(source['name'].getInpParametersValues(self,unstructuredInputs=True).values())
      metadata = source['name'].getAllMetadata()
      if source['name'].type in ['HistorySet','PointSet']:
        groups = []
        if 'HistorySet' in source['name'].type:
          nruns = len(dataIn)
        else:
          nruns = dataIn[0].size
        for run in range(nruns):
          if upGroup:
            groups.append(parentGroupObj.require_group(groupName + b'|' +str(run)))
            if (groupName + "_data") in groups[run]:
              del groups[run][groupName+"_data"]
          else:
            groups.append(parentGroupObj.create_group(groupName + '|' +str(run)))

          groups[run].attrs[b'sourceType'] = utils.toBytes(source['name'].type)
          groups[run].attrs[b'mainClass' ] = b'DataObjects'
          groups[run].attrs[b'EndGroup'   ] = True
          groups[run].attrs[b'parentID'  ] = parentName
          if source['name'].type == 'HistorySet':
            groups[run].attrs[b'inputSpaceHeaders' ] = [utils.toBytes(list(dataIn[run].keys())[i])  for i in range(len(dataIn[run].keys()))]
            groups[run].attrs[b'outputSpaceHeaders'] = [utils.toBytes(list(dataOut[run].keys())[i])  for i in range(len(dataOut[run].keys()))]
            json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(dataIn[run].values()))))
            groups[run].attrs[b'inputSpaceValues'  ] = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(dataIn[run].values()))))
            groups[run].attrs[b'nParams'            ] = len(dataOut[run].keys())
            #collect the outputs
            dataout = np.zeros((next(iter(dataOut[run].values())).size,len(dataOut[run].values())))
            for param in range(len(dataOut[run].values())):
              dataout[:,param] = list(dataOut[run].values())[param][:]
            groups[run].create_dataset(groupName +'|' +str(run)+"_data" , dtype="float", data=dataout)
            groups[run].attrs[b'nTimeSteps'                ] = next(iter(dataOut[run].values())).size
          else:
            groups[run].attrs[b'inputSpaceHeaders' ] = [utils.toBytes(headersIn[i])  for i in range(len(headersIn))]
            groups[run].attrs[b'outputSpaceHeaders'] = [utils.toBytes(headersOut[i])  for i in range(len(headersOut))]
            groups[run].attrs[b'inputSpaceValues'  ] = json.dumps([list(utils.toListFromNumpyOrC1arrayIterative(np.atleast_1d(np.array(dataIn[x][run])).tolist())) for x in range(len(dataIn))])
            groups[run].attrs[b'nParams'            ] = len(headersOut)
            groups[run].attrs[b'nTimeSteps'                ] = 1
            #collect the outputs
            dataout = np.zeros((1,len(dataOut)))
            for param in range(len(dataOut)):
              dataout[0,param] = dataOut[param][run]
            groups[run].create_dataset(groupName +'|' +str(run)+"_data", dtype="float", data=dataout)
          # add metadata if present
          for attr in attributes.keys():
            objectToConvert = mathUtils.convertNumpyToLists(attributes[attr])
            converted = json.dumps(objectToConvert)
            if converted and attr != 'name':
              groups[run].attrs[utils.toBytes(attr)]=converted
          for attr in metadata.keys():
            if len(metadata[attr]) == nruns:
              toProcess = metadata[attr][run]
            else:
              toProcess = metadata[attr]
            if type(toProcess).__name__ == 'list' and 'input' in attr.lower() and isinstance(toProcess[0],Files.File):
              objectToConvert = list(a.__getstate__() for a in toProcess)
            elif isinstance(toProcess,Files.File):
              objectToConvert =toProcess.__getstate__()
            else:
              objectToConvert = mathUtils.convertNumpyToLists(toProcess)
            converted = json.dumps(objectToConvert)
            if converted and attr != 'name':
              groups[run].attrs[utils.toBytes(attr)]=converted

          if parentGroupName != "/":
            self.allGroupPaths.append(parentGroupName + "/" + groupName + '|' +str(run))
            self.allGroupEnds[parentGroupName + "/" + groupName + '|' +str(run)] = True
          else:
            self.allGroupPaths.append("/" + groupName + '|' +str(run))
            self.allGroupEnds["/" + groupName + '|' +str(run)] = True
      else:
        self.raiseAnError(IOError,'The function addGroupDataObjects accepts Data(s) or dictionaries as inputs only!!!!!')
예제 #3
0
  def __addSubGroup(self,groupName,attributes,source):
    """
      Function to add a group into the database (Hierarchical)
      @ In, groupName, string, group name
      @ In, attributes, dict, dictionary of attributes that must be added as metadata
      @ In, source, File object, source data
      @ Out, None
    """
    for index in xrange(len(self.allGroupPaths)):
      comparisonName = self.allGroupPaths[index]
      splittedPath=comparisonName.split('/')
      for splgroup in splittedPath:
        if groupName == splgroup and splittedPath[0] == self.parentGroupName:
          self.raiseAnError(IOError,"Group named " + groupName + " already present in database " + self.name + ". new group " + groupName + " is equal to old group " + comparisonName)
    if source['type'] == 'csv':
      # Source in CSV format
      f = open(source['name'],'rb')
      # Retrieve the headers of the CSV file
      headers = f.readline().split(b",")
      # Load the csv into a numpy array(n time steps, n parameters)
      data = np.loadtxt(f,dtype='float',delimiter=',',ndmin=2)
      # Check if the parent attribute is not null # In this case append a subgroup to the parent group
      # Otherwise => it's the main group
      parentID = None
      if 'metadata' in attributes.keys():
        if 'parentID' in attributes['metadata'].keys():
          parentID = attributes['metadata']['parentID']
      else:
        if 'parentID' in attributes.keys():
          parentID = attributes['parentID']

      if parentID:
        parentName = parentID
      else:
        self.raiseAnError(IOError,'NOT FOUND attribute <parentID> into <attributes> dictionary')
      # Find parent group path
      if parentName != '/':
        parentGroupName = self.__returnParentGroupPath(parentName)
      else:
        parentGroupName = parentName
      # Retrieve the parent group from the HDF5 database
      if parentGroupName in self.h5FileW:
        grp = self.h5FileW.require_group(parentGroupName)
      else:
        # try to guess the parentID from the file name
        head,tail = os.path.split(os.path.dirname(source['name']))
        testParentName = self.__returnParentGroupPath(tail[:-2])
        if testParentName in self.h5FileW:
          grp = self.h5FileW.require_group(testParentName)
        else:
          closestGroup = difflib.get_close_matches(parentName, self.allGroupPaths, n=1, cutoff=0.01)
          errorString = ' NOT FOUND parent group named "' + str(parentName)+'" for loading file '+str(source['name'])
          errorString+= '\n Tried '+str(tail[:-2])+ ' but not found as well. All group paths are:\n -'+'\n -'.join(self.allGroupPaths)
          errorString+= '\n Closest parent group found is "'+str(closestGroup[0] if len(closestGroup) > 0 else 'None')+'"!'
          self.raiseAnError(ValueError,errorString)
      # The parent group is not the endgroup for this branch
      self.allGroupEnds[parentGroupName] = False
      grp.attrs["EndGroup"]   = False
      self.raiseAMessage('Adding group named "' + groupName + '" in Database "'+ self.name +'"')
      # Create the sub-group
      sgrp = grp.create_group(groupName)
      # Create data set in this new group
      sgrp.create_dataset(groupName+"_data", dtype="float", data=data)
      # Add the metadata
      sgrp.attrs["outputSpaceHeaders"   ] = headers
      sgrp.attrs["nParams"  ] = data[0,:].size
      sgrp.attrs["parent"    ] = "root"
      sgrp.attrs["startTime"] = data[0,0]
      sgrp.attrs["end_time"  ] = data[data[:,0].size-1,0]
      sgrp.attrs["nTimeSteps"      ] = data[:,0].size
      sgrp.attrs["EndGroup"  ] = True
      sgrp.attrs["sourceType"] = source['type']
      if source['type'] == 'csv':
        sgrp.attrs["sourceFile"] = source['name']
      # add metadata if present
      for attr in attributes.keys():
        if attr == 'metadata':
          if 'SampledVars' in attributes['metadata'].keys():
            inpHeaders = []
            inpValues  = []
            for inkey, invalue in attributes['metadata']['SampledVars'].items():
              if inkey not in headers:
                inpHeaders.append(utils.toBytes(inkey))
                inpValues.append(invalue)
            if len(inpHeaders) > 0:
              sgrp.attrs[b'inputSpaceHeaders'] = inpHeaders
              sgrp.attrs[b'inputSpaceValues' ] = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list(inpValues))))
        #Files objects are not JSON serializable, so we have to cover that.
        #this doesn't cover all possible circumstance, but it covers the DET case.
        if attr == 'inputFile' and isinstance(attributes[attr][0],Files.File):
          objectToConvert = list(a.__getstate__() for a in attributes[attr])
        else:
          objectToConvert = mathUtils.convertNumpyToLists(attributes[attr])
        converted = json.dumps(objectToConvert)
        if converted and attr != 'name':
          sgrp.attrs[utils.toBytes(attr)]=converted
    else:
      pass
    # The sub-group is the new ending group
    if parentGroupName != "/":
      self.allGroupPaths.append(parentGroupName + "/" + groupName)
      self.allGroupEnds[parentGroupName + "/" + groupName] = True
    else:
      self.allGroupPaths.append("/" + groupName)
      self.allGroupEnds["/" + groupName] = True
    return
예제 #4
0
  def __addGroupRootLevel(self,groupName,attributes,source,upGroup=False):
    """
      Function to add a group into the database (root level)
      @ In, groupName, string, group name
      @ In, attributes, dict, dictionary of attributes that must be added as metadata
      @ In, source, File object, source file
      @ In, upGroup, bool, optional, updated group?
      @ Out, None
    """
    # Check in the "self.allGroupPaths" list if a group is already present...
    # If so, error (Deleting already present information is not desiderable)
    if not upGroup:
      for index in xrange(len(self.allGroupPaths)):
        comparisonName = self.allGroupPaths[index]
        splittedPath=comparisonName.split('/')
        for splgroup in splittedPath:
          if groupName == splgroup and splittedPath[0] == self.parentGroupName:
            self.raiseAnError(IOError,"Group named " + groupName + " already present in database " + self.name + ". new group " + groupName + " is equal to old group " + comparisonName)

    if source['type'] == 'csv':
      # Source in CSV format
      f = open(source['name'],'rb')
      # Retrieve the headers of the CSV file
      firstRow = f.readline().strip(b"\r\n")
      #firstRow = f.readline().translate(None,"\r\n")
      headers = firstRow.split(b",")
      # if there is the alias system, replace the variable name
      if 'alias' in attributes.keys():
        for aliasType in attributes['alias'].keys():
          for var in attributes['alias'][aliasType].keys():
            if attributes['alias'][aliasType][var].strip() in headers:
              headers[headers.index(attributes['alias'][aliasType][var].strip())] = var.strip()
            else:
              metadataPresent = True if 'metadata' in attributes.keys() and 'SampledVars' in attributes['metadata'].keys() else False
              if not (metadataPresent and var.strip() in attributes['metadata']['SampledVars'].keys()):
                self.raiseAWarning('the ' + aliasType +' alias"'+var.strip()+'" has been defined but has not been found among the variables!')
      # Load the csv into a numpy array(n time steps, n parameters)
      data = np.loadtxt(f,dtype='float',delimiter=',',ndmin=2)
      # First parent group is the root name
      parentName = self.parentGroupName.replace('/', '')
      # Create the group
      if parentName != '/':
        parentGroupName = self.__returnParentGroupPath(parentName)
        # Retrieve the parent group from the HDF5 database
        if parentGroupName in self.h5FileW:
          rootgrp = self.h5FileW.require_group(parentGroupName)
        else:
          self.raiseAnError(ValueError,'NOT FOUND group named "' + parentGroupName+'" for loading file '+str(source['name']))
        if upGroup:
          grp = rootgrp.require_group(groupName)
          del grp[groupName+"_data"]
        else:
          grp = rootgrp.create_group(groupName)
      else:
        if upGroup:
          grp = self.h5FileW.require_group(groupName)
        else:
          grp = self.h5FileW.create_group(groupName)
      self.raiseAMessage('Adding group named "' + groupName + '" in DataBase "'+ self.name +'"')
      # Create dataset in this newly added group
      grp.create_dataset(groupName+"_data", dtype="float", data=data)
      # Add metadata
      grp.attrs["outputSpaceHeaders"     ] = headers
      grp.attrs["nParams"                ] = data[0,:].size
      grp.attrs["parentID"               ] = "root"
      grp.attrs["startTime"              ] = data[0,0]
      grp.attrs["end_time"               ] = data[data[:,0].size-1,0]
      grp.attrs["nTimeSteps"             ] = data[:,0].size
      grp.attrs["EndGroup"               ] = True
      grp.attrs["sourceType"             ] = source['type']
      if source['type'] == 'csv':
        grp.attrs["sourceFile"] = source['name']
      for attr in attributes.keys():
        if attr == 'metadata':
          if 'SampledVars' in attributes['metadata'].keys():
            inpHeaders = []
            inpValues  = []
            for inkey, invalue in attributes['metadata']['SampledVars'].items():
              if inkey not in headers:
                inpHeaders.append(utils.toBytes(inkey))
                inpValues.append(invalue)
            if len(inpHeaders) > 0:
              grp.attrs[b'inputSpaceHeaders'] = inpHeaders
              grp.attrs[b'inputSpaceValues' ] = json.dumps(list(utils.toListFromNumpyOrC1arrayIterative(list( inpValues))))
        objectToConvert = mathUtils.convertNumpyToLists(attributes[attr])
        for o,obj in enumerate(objectToConvert):
          if isinstance(obj,Files.File):
            objectToConvert[o]=obj.getFilename()
        converted = json.dumps(objectToConvert)
        if converted and attr != 'name':
          grp.attrs[utils.toBytes(attr)]=converted
        #decoded = json.loads(grp.attrs[utils.toBytes(attr)])
      if "inputFile" in attributes.keys():
        grp.attrs[utils.toString("inputFile")] = utils.toString(" ".join(attributes["inputFile"])) if type(attributes["inputFile"]) == type([]) else utils.toString(attributes["inputFile"])
    else:
      self.raiseAnError(ValueError,source['type'] + " unknown!")
    # Add the group name into the list "self.allGroupPaths" and
    # set the relative bool flag into the dictionary "self.allGroupEnds"
    if parentGroupName != "/":
      self.allGroupPaths.append(parentGroupName + "/" + groupName)
      self.allGroupEnds[parentGroupName + "/" + groupName] = True
    else:
      self.allGroupPaths.append("/" + groupName)
      self.allGroupEnds["/" + groupName] = True