def new_tasks(self, extra): logger.info('entering new_tasks') # Adjust logging level for stream handler. Could be extracted into a general function. for handler in logger.handlers: if not isinstance(handler, logbook.FileHandler): logger.handlers.remove(handler) mySH = logbook.StreamHandler(stream = sys.stdout, level = self.params.solverVerb.upper(), format_string = '{record.message}', bubble = True) logger.handlers.append(mySH) baseDir = self.params.initial xVars = self.params.xVars countryList = self.params.countryList.split() ctryIndices = getIndex(base = [len(countryList), len(countryList)], restr = 'lowerTr') for ctryIndex in ctryIndices: logger.info(countryList[ctryIndex[0]] + countryList[ctryIndex[1]]) # Compute domain xVarsDom = self.params.xVarsDom.split() lowerBds = np.array([xVarsDom[i] for i in range(len(xVarsDom)) if i % 2 == 0], dtype = 'float64') upperBds = np.array([xVarsDom[i] for i in range(len(xVarsDom)) if i % 2 == 1], dtype = 'float64') domain = zip(lowerBds, upperBds) # Make problem type specific adjustments. if self.params.problemType == 'one4all': gdpTable = tableDict.fromTextFile(fileIn = os.path.join(self.params.pathEmpirical, 'outputInput/momentTable/Gdp/gdpMoments.csv'), delim = ',', width = 20) jobname = 'one4all' logger.info('%s' % self.params.norm) norm = self.params.norm try: norm = int(norm) except ValueError: if norm == "np.inf" or norm == "inf": norm = np.inf else: pass # in particular custom norms logger.info('using norm %s' % (norm)) path_to_stage_dir = os.getcwd() executable = os.path.basename(self.params.executable) analyzeResults = anaOne4all(len(list(ctryIndices)), norm = norm) nlc = nlcOne4all(gdpTable = gdpTable, ctryList = countryList, domain = domain, logFile = os.path.join(path_to_stage_dir, 'nlc.log')) combOverviews = combineOverviews.combineOverviews(overviewSimuFile = 'eSigmaTable', tableName = 'ag_eSigmaTable', sortKeys = ['norm']) plot3dTable = combineOverviews.plotTable(tablePath =os.path.join(path_to_stage_dir, 'ag_eSigmaTable'), savePath = os.path.join(path_to_stage_dir, 'scatter3d')) plot3dTable.columnNames = ['E', 'sigma', 'norm'] for ctryIndex in ctryIndices: Ctry1 = countryList[ctryIndex[0]] Ctry2 = countryList[ctryIndex[1]] # Set Ctry information for this run. update_parameter_in_file(os.path.join(baseDir, 'input/markovA.in'), 'Ctry', 0, Ctry1, 'space-separated') update_parameter_in_file(os.path.join(baseDir, 'input/markovB.in'), 'Ctry', 0, Ctry2, 'space-separated') # Get the correct Ctry Paras into base dir. self.getCtryParas(baseDir, Ctry1, Ctry2) # Copy base dir ctryBaseDir = os.path.join(path_to_stage_dir, 'base' + Ctry1 + Ctry2) try: shutil.copytree(baseDir, ctryBaseDir) except: logger.info('%s already exists' % baseDir) kwargs = extra.copy() kwargs['output_dir'] = path_to_stage_dir # yield job yield (jobname, gParaSearchDriver, [ self.params.executable, path_to_stage_dir, self.params.architecture, baseDir, self.params.xVars, self.params.initialPop, self.params.nPopulation, domain, self.params.solverVerb, self.params.problemType, self.params.pathEmpirical, self.params.itermax, self.params.xConvCrit, self.params.yConvCrit, self.params.makePlots, self.params.optStrategy, self.params.fWeight, self.params.fCritical, self.params.countryList, analyzeResults, nlc, plot3dTable, combOverviews ], kwargs) elif self.params.problemType == 'one4eachPair': for ctryIndex in ctryIndices: Ctry1 = countryList[ctryIndex[0]] Ctry2 = countryList[ctryIndex[1]] logger.info(Ctry1 + Ctry2) jobname = Ctry1 + '-' + Ctry2 # set stage dir. path_to_stage_dir = self.make_directory_path(self.params.output, jobname) gc3libs.utils.mkdir(path_to_stage_dir) #path_to_stage_dir = os.path.join(iterationFolder, jobname) # Get moments table from empirical analysis gdpTable = tableDict.fromTextFile(fileIn = os.path.join(self.params.pathEmpirical, 'outputInput/momentTable/Gdp/gdpMoments.csv'), delim = ',', width = 20) # Set Ctry information for this run. update_parameter_in_file(os.path.join(baseDir, 'input/markovA.in'), 'Ctry', 0, Ctry1, 'space-separated') update_parameter_in_file(os.path.join(baseDir, 'input/markovB.in'), 'Ctry', 0, Ctry2, 'space-separated') # Get the correct Ctry Paras into base dir. self.getCtryParas(baseDir, Ctry1, Ctry2) # Copy base dir ctryBaseDir = os.path.join(path_to_stage_dir, 'base') try: shutil.copytree(baseDir, ctryBaseDir) except: logger.info('%s already exists' % baseDir) EA = getParameter(fileIn = os.path.join(baseDir, 'input/parameters.in'), varIn = 'EA', regexIn = 'bar-separated') EB = getParameter(fileIn = os.path.join(baseDir, 'input/parameters.in'), varIn = 'EB', regexIn = 'bar-separated') sigmaA = getParameter(fileIn = os.path.join(baseDir, 'input/parameters.in'), varIn = 'sigmaA', regexIn = 'bar-separated') sigmaB = getParameter(fileIn = os.path.join(baseDir, 'input/parameters.in'), varIn = 'sigmaB', regexIn = 'bar-separated') # Pass ctry information to nlc analyzeResults = anaOne4eachPair nlc = nlcOne4eachPair(gdpTable = gdpTable, ctryPair = [Ctry1, Ctry2], domain = domain, logFile = os.path.join(path_to_stage_dir, 'nlc.log')) combOverviews = combineOverviews.combineOverviews(overviewSimuFile = 'overviewSimu', tableName = 'agTable', sortKeys = ['normDev']) plot3dTable = combineOverviews.plotTable(tablePath =os.path.join(path_to_stage_dir, 'agTable'), savePath = os.path.join(path_to_stage_dir, 'scatter3d')) plot3dTable.columnNames = ['EA', 'sigmaA', 'normDev'] executable = os.path.basename(self.params.executable) kwargs = extra.copy() kwargs['output_dir'] = path_to_stage_dir # yield job yield (jobname, gParaSearchDriver, [ self.params.executable, path_to_stage_dir, self.params.architecture, ctryBaseDir, self.params.xVars, self.params.initialPop, self.params.nPopulation, domain, self.params.solverVerb, self.params.problemType, self.params.pathEmpirical, self.params.itermax, self.params.xConvCrit, self.params.yConvCrit, self.params.makePlots, self.params.optStrategy, self.params.fWeight, self.params.fCritical, self.params.countryList, analyzeResults, nlc, plot3dTable, combOverviews ], kwargs) elif self.params.problemType == 'one4eachCtry': gdpTable = tableDict.fromTextFile(fileIn = os.path.join(self.params.pathEmpirical, 'outputInput/momentTable/Gdp/gdpMoments.csv'), delim = ',', width = 20) if len(countryList) > len(self.params.xVars.split()) / 2 and len(xVars.split()) == 2: if self.params.xVars[-1:] != " ": xVars = ( self.params.xVars + ' ' ) * len(countryList) xVars = xVars[:-1] else: xVars = self.params.xVars * len(countryList) if self.params.xVarsDom[-1:] != " ": xVarsDom = ( self.params.xVarsDom + ' ' ) * len(countryList) xVarsDom = xVarsDom[:-1].split() else: xVarsDom = self.params.xVarsDom * len(countryList) xVarsDom = xVarsDom.split() else: xVars = self.params.xVars xVarsDom = self.params.xVarsDom[:-1].split() jobname = 'one4eachCtry' lowerBds = np.array([xVarsDom[i] for i in range(len(xVarsDom)) if i % 2 == 0], dtype = 'float64') upperBds = np.array([xVarsDom[i] for i in range(len(xVarsDom)) if i % 2 == 1], dtype = 'float64') domain = zip(lowerBds, upperBds) norm = self.params.norm try: norm = int(norm) except ValueError: if norm == "np.inf" or norm == "inf": norm = np.inf else: pass # in particular custom norms logger.info('using norm %s' % (norm)) path_to_stage_dir = os.getcwd() executable = os.path.basename(self.params.executable) analyzeResults = anaOne4eachCtry(countryList, len(list(ctryIndices)), norm = norm) nlc = nlcOne4eachCtry(gdpTable = gdpTable, ctryList = countryList, domain = domain, logFile = os.path.join(path_to_stage_dir, 'nlc.log')) combOverviews = combineOverviews.combineOverviews(overviewSimuFile = 'eSigmaTable', tableName = 'ag_eSigmaTable', sortKeys = ['norm']) deKenPrice.plotPopulation = plotPopOne4eachCtry(countryList) ## # Set solver variables ## nXvars = len(xVars.split()) ## deKenPrice.I_NP = int(self.params.nPopulation) ## deKenPrice.F_weight = float(self.params.fWeight) ## deKenPrice.F_CR = float(self.params.fCritical) ## deKenPrice.I_D = int(nXvars) ## deKenPrice.lowerBds = np.array([ element[0] for element in domain ], dtype = 'float64') ## deKenPrice.upperBds = np.array([ element[1] for element in domain ], dtype = 'float64') ## deKenPrice.I_itermax = int(self.params.itermax) ## deKenPrice.F_VTR = float(self.params.yConvCrit) ## deKenPrice.I_strategy = int(self.params.optStrategy) ## deKenPrice.I_plotting = int(self.params.makePlots) ## deKenPrice.xConvCrit = float(self.params.xConvCrit) ## deKenPrice.workingDir = path_to_stage_dir ## deKenPrice.verbosity = self.params.solverVerb plot3dTable = emptyFun # plot3dTable = combineOverviews.plotTable(tablePath =os.path.join(path_to_stage_dir, 'ag_eSigmaTable'), savePath = os.path.join(path_to_stage_dir, 'scatter3d')) # plot3dTable.columnNames = ['E', 'sigma', 'norm'] for ctryIndex in ctryIndices: Ctry1 = countryList[ctryIndex[0]] Ctry2 = countryList[ctryIndex[1]] # Set Ctry information for this run. update_parameter_in_file(os.path.join(baseDir, 'input/markovA.in'), 'Ctry', 0, Ctry1, 'space-separated') update_parameter_in_file(os.path.join(baseDir, 'input/markovB.in'), 'Ctry', 0, Ctry2, 'space-separated') # Get the correct Ctry Paras into base dir. self.getCtryParas(baseDir, Ctry1, Ctry2) # Copy base dir ctryBaseDir = os.path.join(path_to_stage_dir, 'base' + Ctry1 + Ctry2) try: shutil.copytree(baseDir, ctryBaseDir) except: logger.info('%s already exists' % baseDir) kwargs = extra.copy() kwargs['output_dir'] = path_to_stage_dir # yield job yield (jobname, gParaSearchDriver, [ self.params.executable, path_to_stage_dir, self.params.architecture, baseDir, xVars, self.params.initialPop, self.params.nPopulation, domain, self.params.solverVerb, self.params.problemType, self.params.pathEmpirical, self.params.itermax, self.params.xConvCrit, self.params.yConvCrit, self.params.makePlots, self.params.optStrategy, self.params.fWeight, self.params.fCritical, self.params.countryList, analyzeResults, nlc, plot3dTable, combOverviews ], kwargs) # Set solver variables nXvars = len(xVars.split()) deKenPrice.I_NP = int(self.params.nPopulation) deKenPrice.F_weight = float(self.params.fWeight) deKenPrice.F_CR = float(self.params.fCritical) deKenPrice.I_D = int(nXvars) if self.params.problemType == 'one4eachCtry': deKenPrice.lowerBds = np.array([xVarsDom[i] for i in range(len(xVarsDom)) if i % 2 == 0], dtype = 'float64') deKenPrice.upperBds = np.array([xVarsDom[i] for i in range(len(xVarsDom)) if i % 2 == 1], dtype = 'float64') else: deKenPrice.lowerBds = np.array([ element[0] for element in domain ], dtype = 'float64') deKenPrice.upperBds = np.array([ element[1] for element in domain ], dtype = 'float64') deKenPrice.I_itermax = int(self.params.itermax) deKenPrice.F_VTR = float(self.params.yConvCrit) deKenPrice.I_strategy = int(self.params.optStrategy) deKenPrice.I_plotting = int(self.params.makePlots) deKenPrice.xConvCrit = float(self.params.xConvCrit) deKenPrice.workingDir = path_to_stage_dir deKenPrice.verbosity = self.params.solverVerb logger.info('done with new_tasks')
def __init__(self, inParaCombos, iteration, pathToExecutable, pathToStageDir, architecture, baseDir, xVars, solverVerb, problemType, analyzeResults, ctryList, **extra_args): ''' Generate a list of tasks and initialize a ParallelTaskCollection with them. Uses paraLoop class to generate a list of (descriptions, substitutions for the input files). Descriptions are generated from variable names that are hard coded in this method right now. Uses method generateTaskList to create a list of GPremiumApplication's which are invoked from a list of inputs (appropriately adjusted input files), the output directory and some further settings for each run. inParaCombos: List of tuples defining the parameter combinations. iteration: Current iteration number. pathToExecutable: Path to the executable (the external program to be called). pathToStageDir: Root path. Usually os.getcwd() architecture: 32 or 64 bit. baseDir: Directory in which the input files are located. xVars: Names of the x variables. solverVerb: Logger verbosity. problemType: Forward premium specific flag to determine which case to look at. analyzeResults: Function to use to analyze the emerging output. ctryList: Forward premium specific list of ctrys to look at. ''' logger.debug('entering gParaSearchParalell.__init__') # Set up initial variables and set the correct methods. self.pathToStageDir = pathToStageDir self.problemType = problemType self.executable = pathToExecutable self.architecture = architecture self.baseDir = baseDir self.verbosity = solverVerb.upper() self.xVars = xVars self.n = len(self.xVars.split()) self.analyzeResults = analyzeResults self.ctryList = ctryList self.iteration = iteration self.jobname = 'evalSolverGuess' + '-' + extra_args['jobname'] + '-' + str(self.iteration) self.extra_args = extra_args tasks = [] # --- createJobs_x --- # Log activity cDate = datetime.date.today() cTime = datetime.datetime.time(datetime.datetime.now()) dateString = '{0:04d}-{1:02d}-{2:02d}-{3:02d}-{4:02d}-{5:02d}'.format(cDate.year, cDate.month, cDate.day, cTime.hour, cTime.minute, cTime.second) logger.debug('Establishing parallel task on %s' % dateString) # Enter an iteration specific folder self.iterationFolder = os.path.join(self.pathToStageDir, 'Iteration-' + str(self.iteration)) try: os.mkdir(self.iterationFolder) except OSError: print '%s already exists' % self.iterationFolder # save population to file np.savetxt(os.path.join(self.iterationFolder, 'curPopulation'), inParaCombos, delimiter = ' ') # Take the list of parameter combinations and translate them in a comma separated list of values for each variable to be fed into paraLoop file. # This can be done much more elegantly with ','.join() but it works... vals = [] nVariables = range(len(inParaCombos[0])) for ixVar in nVariables: varValString = '' for ixParaCombo, paraCombo in enumerate(inParaCombos): ### Should make more precise string conversion. varValString += str(paraCombo[ixVar]) if ixParaCombo < len(inParaCombos) - 1: varValString += ', ' vals.append( varValString ) # Make problem specific adjustments to the paraLoop file. if self.problemType == 'one4all': print 'one4all' variables = ['Ctry', 'Ctry', 'EA', 'EB', 'sigmaA', 'sigmaB'] groups = [ 0, 0, 1, 1, 1, 1 ] groupRestrs = [ 'lowerTr', 'lowerTr', 'diagnol', 'diagnol', 'diagnol', 'diagnol' ] writeVals = [ ", ".join(self.ctryList), ", ".join(self.ctryList), vals[0], vals[0], vals[1], vals[1] ] self.variables = ['EA','sigmaA'] self.paraCombos = inParaCombos paraFiles = [ 'input/markovA.in', 'input/markovB.in', 'input/parameters.in', 'input/parameters.in', 'input/parameters.in', 'input/parameters.in' ] paraFileRegex = [ 'space-separated', 'space-separated', 'bar-separated', 'bar-separated' , 'bar-separated' , 'bar-separated' ] self.analyzeResults.tablePath = self.iterationFolder elif self.problemType == 'one4eachPair': print 'one4eachPair' # Check if EA or sigmaA are alone in the specified parameters. If so make diagnol adjustments writeVals = [] if 'EA' in self.xVars and not 'EB' in self.xVars: variables = [ 'EA', 'EB' ] groups = [ '0', '0' ] groupRestrs = [ 'diagnol', 'diagnol' ] writeVals.append(vals[0]) writeVals.append(vals[0]) paraCombosEA = [ np.append(ele[0], ele[0]) for ele in inParaCombos ] if 'sigmaA' in self.xVars and not 'sigmaB' in self.xVars: variables.append( 'sigmaA') variables.append('sigmaB') groups.append( '0') groups.append('0') groupRestrs.append( 'diagnol') groupRestrs.append( 'diagnol' ) writeVals.append(vals[1]) writeVals.append(vals[1]) paraCombosSigmaA = [ np.append(ele[1], ele[1]) for ele in inParaCombos ] # match ctry with val ctryVals = {} for ixCtry, ctry in enumerate(ctryList): ctryVals[ctry] = vals self.variables = variables # Prepare paraCombos matching to resulting table. Used in analyzeOverviewTable # !!! This should be dependent on problem type or on missing variables in xvars. !!! paraCombos = [] for EA,sA in zip(paraCombosEA, paraCombosSigmaA): paraCombo = np.append(EA, sA) paraCombos.append(paraCombo) self.paraCombos = paraCombos paraFiles = [ 'input/parameters.in', 'input/parameters.in', 'input/parameters.in', 'input/parameters.in' ] paraFileRegex = [ 'bar-separated', 'bar-separated' , 'bar-separated' , 'bar-separated' ] elif self.problemType == 'one4eachCtry': print 'one4eachCtry' ctry1List = [] ctry2List = [] EAList = [] EBList = [] sigmaAList = [] sigmaBList = [] self.paraCombos = [] ctryIndices = getIndex([len(ctryList), len(ctryList)], 'lowerTr') for ixCombo in range(len(inParaCombos)): ctry1ListCombo = [] ctry2ListCombo = [] EAListCombo = [] EBListCombo = [] sigmaAListCombo = [] sigmaBListCombo = [] for ctryIndex in ctryIndices: ctry1ListCombo.append(ctryList[ctryIndex[0]]) ctry2ListCombo.append(ctryList[ctryIndex[1]]) EAListCombo.append(inParaCombos[ixCombo][0 + 2 * ctryIndex[0]]) sigmaAListCombo.append(inParaCombos[ixCombo][1 + 2 * ctryIndex[0]]) EBListCombo.append(inParaCombos[ixCombo][0 + 2 *ctryIndex[1]]) sigmaBListCombo.append(inParaCombos[ixCombo][1 + 2 * ctryIndex[1]]) self.paraCombos.append(zip(ctry1ListCombo, ctry2ListCombo, EAListCombo, sigmaAListCombo, EBListCombo, sigmaBListCombo)) ctry1List.extend(ctry1ListCombo) ctry2List.extend(ctry2ListCombo) EAList.extend(map(str, EAListCombo)) EBList.extend(map(str, EBListCombo)) sigmaAList.extend(map(str, sigmaAListCombo)) sigmaBList.extend(map(str, sigmaBListCombo)) variables = ['Ctry', 'Ctry', 'EA', 'EB', 'sigmaA', 'sigmaB'] groups = [ 0, 0, 0, 0, 0, 0 ] groupRestrs = [ 'diagnol', 'diagnol', 'diagnol', 'diagnol', 'diagnol', 'diagnol' ] writeVals = [ ", ".join(ctry1List), ", ".join(ctry2List), ", ".join(EAList), ", ".join(EBList), ", ".join(sigmaAList),", ".join(sigmaBList)] paraFiles = [ 'input/markovA.in', 'input/markovB.in', 'input/parameters.in', 'input/parameters.in', 'input/parameters.in', 'input/parameters.in' ] paraFileRegex = [ 'space-separated', 'space-separated', 'bar-separated', 'bar-separated' , 'bar-separated' , 'bar-separated' ] #self.paraCombos = inParaCombos self.analyzeResults.tablePath = self.iterationFolder # variable list passed to analyzeOverviewTables self.variables = ['EA', 'sigmaA', 'EB', 'sigmaB'] print 'Done setting up one4eachCtry. ' # Write a para.loop file to generate grid jobs para_loop = self.writeParaLoop(variables = variables, groups = groups, groupRestrs = groupRestrs, vals = writeVals, desPath = os.path.join(self.iterationFolder, 'para.loopTmp'), paraFiles = paraFiles, paraFileRegex = paraFileRegex) paraLoop_fp.__init__(self, verbosity = self.verbosity) tasks = self.generateTaskList(para_loop, self.iterationFolder) ParallelTaskCollection.__init__(self, self.jobname, tasks)
def process_para_file(self, path_to_para_loop): """ Create sets of parameter substituions for the ``forwardPremium`` input files and yield a unique identifier for each set. The "recipe" for building such sets is given in the ``para.loop`` file pointed to by argument `path_to_para_loop`. In more detail, `process_para_file` is a generator function, that: 1. parses the `para.loop` file; 2. computes the distinct sets of substitutions that should be applied to ``forwardPremium`` input files; one such set of substitutions corresponds to a single ``forwardPremium`` run; 3. creates and yields a "unique name" string that shall identify *this* particular combination of parameters and input files. Each generator iteration yields a pair `(jobname, substitutions)`, where `jobname` is a unique string and `substitutions` is a dictionary mapping file names (as specified in the ``paraFile`` column of the ``para.loop`` file) to quadruples `(var, index, val, regex)`: into each input file, the variable `var` (corresponding to the group `paraIndex` in `regex`) is given value `val`. :param: `path_to_para_loop`: complete pathname of a file in ``para.loop`` format. """ params = self._read_para(path_to_para_loop) num_params = len(params) self.logger.debug("Read %d parameters from file '%s'" % (num_params, path_to_para_loop)) variables = params['variables'] indices = params['indices'] paraFiles = params['paraFiles'] paraProps = params['paraProps'] groups = np.array(params['groups'], dtype = np.int16) groupRestrs = params['groupRestrs'] paraFileRegex = params['paraFileRegex'] # XXX: why not just use `vals = np.copy(params['vals'])` ?? vals = np.copy(params['vals']) #vals = np.empty((num_params,), 'U100') #for ixVals, paraVals in enumerate(params['vals']): #vals[ixVals] = paraVals # remap group numbers so that they start at 0 and increase in # steps of 1 groups = self._remap_groups(groups) num_groups = len(np.unique(groups)) self.logger.debug("Number of groups: %d" % num_groups) self.logger.debug('variables: %s' % variables) self.logger.debug('indices: %s' % indices) self.logger.debug('vals: %s' % vals) self.logger.debug('paraFiles: %s' % paraFiles) self.logger.debug('groups: %s' % groups) self.logger.debug('groupRestrs: %s' % groupRestrs) self.logger.debug('paraFileRegex: %s' % paraFileRegex) # check parameter files for consistency # # NOTE: the weird use of booleans as array indices is a # feature of NumPy, see: # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html for group in groups: groupSelector = (groups == group) groupRestriction = np.unique(groupRestrs[groupSelector]) nGroupRestriction = len(groupRestriction) nGroupVariables = sum(groupSelector) nSwIndicator = sum(paraProps[groupSelector] == 'swIndicator') nRelevGroupVariables = nGroupVariables - nSwIndicator if nGroupRestriction != 1: raise gc3libs.exceptions.InvalidUsage( "Group restrictions '%s' are inconsistent for group '%s'" % (groupRestriction, group)) elif nRelevGroupVariables == 1 and groupRestriction[0].lower() == 'lowertr': raise gc3libs.exceptions.InvalidUsage( "No sense in using 'lower triangular' restriction" " with just one variable.") # Set up groups groupBase = [] groupIndices = [] metaBase = [] for ixGroup, group in enumerate(np.unique(groups[groups >= 0])): # ixGroup is used as index for groups self.logger.debug('At gpremium L200, ixGroup=%s' % ixGroup) self.logger.debug('At gpremium L201, group=%s'% group) groupBase.append([]) # Select vars belonging to group 'group'. Leave out switch indicator vars # -------------------------------- relevGroups = (groups == group) swVars = (paraProps == 'swIndicator') groupSelector = relevGroups for ix in range(0, len(relevGroups)): if relevGroups[ix] and not swVars[ix]: groupSelector[ix] = True else: groupSelector[ix] = False # ------------------------------- groupRestr = np.unique(groupRestrs[groupSelector]) self.logger.debug('At gpremium L%d, groupRestr=%s' % (492, groupRestr)) self.logger.debug('At gpremium L%d, groupSelector=%s' % (493, groupSelector)) if len(groupRestr) != 1: raise gc3libs.exceptions.InvalidUsage( "Groups have different restrictions") for groupVals in vals[groupSelector]: values = str2vals(groupVals) groupBase[group].append(len(np.array(values))) self.logger.debug('At gpremium L%d, groupvals=%s' % (500, values)) groupIndices.append(list(getIndex(groupBase[ixGroup], groupRestr))) self.logger.debug('At gpremium L%d, groupIndices=%s' % (503, groupIndices[ixGroup])) metaBase.append(len(groupIndices[ixGroup])) # Combine groups without restriction metaIndices = list(getIndex(metaBase, None)) nMetaIndices = len(metaIndices) self.logger.debug('Summary after establishing groups:') self.logger.debug(' groupbase: %s' % groupBase) self.logger.debug(' groupindices: %s' % groupIndices) self.logger.debug(' metabase: %s' % metaBase) self.logger.debug(' metaind: %s' % metaIndices) self.logger.debug("Starting enumeration of independent runs...") for ixMeta, meta in enumerate(metaIndices): self.logger.debug("Loop iteration(ixMeta) %d of %d (%.2f%%)" % (ixMeta+1, nMetaIndices, 100.0 * ((1+ixMeta) / nMetaIndices))) index = self.getFullIndex(ixMeta, metaIndices, groupIndices, groups, paraProps, vals) self.logger.debug("Index before flattening: %s" % index) index = list(flatten(index)) self.logger.debug('Flattened index: %s' % index) for ixVar in range(0, len(variables)): self.logger.debug('variable #%d is %s' % (ixVar, variables[ixVar])) runDescription = 'para'# os.path.basename(path_to_para_loop)[:-5] substs = gc3libs.utils.defaultdict(list) for ixVar, var in enumerate(variables): self.logger.debug('variable: %s' % variables[ixVar]) var = variables[ixVar] group = groups[ixVar] paraFile = paraFiles[ixVar] adjustIndex = indices[ixVar] val = format_newVal(extractVal(ixVar, vals, index)) regex = paraFileRegex[ixVar] paraIndex = str2tuple(indices[ixVar]) self.logger.debug('paraIndex: %s' % paraIndex) substs[paraFile].append((var, val, paraIndex, regex)) if (group >= 0) and paraProps[ixVar] != 'swIndicator': if ixVar < len(variables): runDescription += '_' runDescription += '%s=%s' % (var, val) yield (runDescription, substs)
def process_para_file(self, path_to_para_loop): """ Create sets of parameter substituions for the ``forwardPremium`` input files and yield a unique identifier for each set. The "recipe" for building such sets is given in the ``para.loop`` file pointed to by argument `path_to_para_loop`. In more detail, `process_para_file` is a generator function, that: 1. parses the `para.loop` file; 2. computes the distinct sets of substitutions that should be applied to ``forwardPremium`` input files; one such set of substitutions corresponds to a single ``forwardPremium`` run; 3. creates and yields a "unique name" string that shall identify *this* particular combination of parameters and input files. Each generator iteration yields a pair `(jobname, substitutions)`, where `jobname` is a unique string and `substitutions` is a dictionary mapping file names (as specified in the ``paraFile`` column of the ``para.loop`` file) to quadruples `(var, index, val, regex)`: into each input file, the variable `var` (corresponding to the group `paraIndex` in `regex`) is given value `val`. :param: `path_to_para_loop`: complete pathname of a file in ``para.loop`` format. """ params = self._read_para(path_to_para_loop) num_params = len(params) self.logger.debug("Read %d parameters from file '%s'" % (num_params, path_to_para_loop)) variables = params["variables"] indices = params["indices"] paraFiles = params["paraFiles"] paraProps = params["paraProps"] groups = np.array(params["groups"], dtype=np.int16) groupRestrs = params["groupRestrs"] paraFileRegex = params["paraFileRegex"] # XXX: why not just use `vals = np.copy(params['vals'])` ?? vals = np.copy(params["vals"]) # vals = np.empty((num_params,), 'U100') # for ixVals, paraVals in enumerate(params['vals']): # vals[ixVals] = paraVals # remap group numbers so that they start at 0 and increase in # steps of 1 groups = self._remap_groups(groups) num_groups = len(np.unique(groups)) self.logger.debug("Number of groups: %d" % num_groups) self.logger.debug("variables: %s" % variables) self.logger.debug("indices: %s" % indices) self.logger.debug("vals: %s" % vals) self.logger.debug("paraFiles: %s" % paraFiles) self.logger.debug("groups: %s" % groups) self.logger.debug("groupRestrs: %s" % groupRestrs) self.logger.debug("paraFileRegex: %s" % paraFileRegex) # check parameter files for consistency # # NOTE: the weird use of booleans as array indices is a # feature of NumPy, see: # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html for group in groups: groupSelector = groups == group groupRestriction = np.unique(groupRestrs[groupSelector]) nGroupRestriction = len(groupRestriction) nGroupVariables = sum(groupSelector) nSwIndicator = sum(paraProps[groupSelector] == "swIndicator") nRelevGroupVariables = nGroupVariables - nSwIndicator if nGroupRestriction != 1: raise gc3libs.exceptions.InvalidUsage( "Group restrictions '%s' are inconsistent for group '%s'" % (groupRestriction, group) ) elif nRelevGroupVariables == 1 and groupRestriction[0].lower() == "lowertr": raise gc3libs.exceptions.InvalidUsage( "No sense in using 'lower triangular' restriction" " with just one variable." ) # Set up groups groupBase = [] groupIndices = [] metaBase = [] for ixGroup, group in enumerate(np.unique(groups[groups >= 0])): # ixGroup is used as index for groups self.logger.debug("At gpremium L200, ixGroup=%s" % ixGroup) self.logger.debug("At gpremium L201, group=%s" % group) groupBase.append([]) # Select vars belonging to group 'group'. Leave out switch indicator vars # -------------------------------- relevGroups = groups == group swVars = paraProps == "swIndicator" groupSelector = relevGroups for ix in range(0, len(relevGroups)): if relevGroups[ix] and not swVars[ix]: groupSelector[ix] = True else: groupSelector[ix] = False # ------------------------------- groupRestr = np.unique(groupRestrs[groupSelector]) self.logger.debug("At gpremium L%d, groupRestr=%s" % (492, groupRestr)) self.logger.debug("At gpremium L%d, groupSelector=%s" % (493, groupSelector)) if len(groupRestr) != 1: raise gc3libs.exceptions.InvalidUsage("Groups have different restrictions") for groupVals in vals[groupSelector]: values = str2vals(groupVals) groupBase[group].append(len(np.array(values))) self.logger.debug("At gpremium L%d, groupvals=%s" % (500, values)) groupIndices.append(list(getIndex(groupBase[ixGroup], groupRestr))) self.logger.debug("At gpremium L%d, groupIndices=%s" % (503, groupIndices[ixGroup])) metaBase.append(len(groupIndices[ixGroup])) # Combine groups without restriction metaIndices = list(getIndex(metaBase, None)) nMetaIndices = len(metaIndices) self.logger.debug("Summary after establishing groups:") self.logger.debug(" groupbase: %s" % groupBase) self.logger.debug(" groupindices: %s" % groupIndices) self.logger.debug(" metabase: %s" % metaBase) self.logger.debug(" metaind: %s" % metaIndices) self.logger.debug("Starting enumeration of independent runs...") for ixMeta, meta in enumerate(metaIndices): self.logger.debug( "Loop iteration(ixMeta) %d of %d (%.2f%%)" % (ixMeta + 1, nMetaIndices, 100.0 * ((1 + ixMeta) / nMetaIndices)) ) index = self.getFullIndex(ixMeta, metaIndices, groupIndices, groups, paraProps, vals) self.logger.debug("Index before flattening: %s" % index) index = list(flatten(index)) self.logger.debug("Flattened index: %s" % index) for ixVar in range(0, len(variables)): self.logger.debug("variable #%d is %s" % (ixVar, variables[ixVar])) runDescription = "para" # os.path.basename(path_to_para_loop)[:-5] substs = gc3libs.utils.defaultdict(list) for ixVar, var in enumerate(variables): self.logger.debug("variable: %s" % variables[ixVar]) var = variables[ixVar] group = groups[ixVar] paraFile = paraFiles[ixVar] adjustIndex = indices[ixVar] val = format_newVal(extractVal(ixVar, vals, index)) regex = paraFileRegex[ixVar] paraIndex = str2tuple(indices[ixVar]) self.logger.debug("paraIndex: %s" % paraIndex) substs[paraFile].append((var, val, paraIndex, regex)) if (group >= 0) and paraProps[ixVar] != "swIndicator": if ixVar < len(variables): runDescription += "_" runDescription += "%s=%s" % (var, val) yield (runDescription, substs)