def __init__(self, config, source): self._rawSource = source BasicParameterAdapter.__init__(self, config, source) self._mapJob2PID = {} self._pathJob2PID = config.getWorkPath('params.map.gz') self._pathParams = config.getWorkPath('params.dat.gz') # Find out if init should be performed - overrides userResync! userInit = config.getState('init', detail = 'parameters') needInit = False if not (os.path.exists(self._pathParams) and os.path.exists(self._pathJob2PID)): needInit = True # Init needed if no parameter log exists if userInit and not needInit and (source.getMaxParameters() != None): utils.eprint('Re-Initialization will overwrite the current mapping between jobs and parameter/dataset content! This can lead to invalid results!') if utils.getUserBool('Do you want to perform a syncronization between the current mapping and the new one to avoid this?', True): userInit = False doInit = userInit or needInit # Find out if resync should be performed userResync = config.getState('resync', detail = 'parameters') needResync = False pHash = self._rawSource.getHash() self.storedHash = config.get('parameter hash', pHash, persistent = True) if self.storedHash != pHash: needResync = True # Resync needed if parameters have changed doResync = (userResync or needResync) and not doInit if not doResync and not doInit: # Reuse old mapping activity = utils.ActivityLog('Loading cached parameter information') self.readJob2PID() elif doResync: # Perform sync activity = utils.ActivityLog('Syncronizing parameter information') self.storedHash = None self._resyncInternal() elif doInit: # Write current state self.writeJob2PID(self._pathJob2PID) GCDumpParameterSource.write(self._pathParams, self)
def _resyncInternal(self): # This function is _VERY_ time critical! tmp = self._rawSource.resync() # First ask about psource changes (redo, disable, sizeChange) = (set(tmp[0]), set(tmp[1]), tmp[2]) hashNew = self._rawSource.getHash() hashChange = self.storedHash != hashNew self.storedHash = hashNew if not (redo or disable or sizeChange or hashChange): self._resyncState = None return def translatePSource(psource): # Reduces psource output to essential information for diff keys_store = sorted(filter(lambda k: k.untracked == False, psource.getJobKeys())) def translateEntry(meta): # Translates parameter setting into hash tmp = md5() for key in filter(lambda k: k in meta, keys_store): if str(meta[key]): tmp.update(key) tmp.update(str(meta[key])) return { ParameterInfo.HASH: tmp.hexdigest(), 'GC_PARAM': meta['GC_PARAM'], ParameterInfo.ACTIVE: meta[ParameterInfo.ACTIVE] } if psource.getMaxJobs() != None: for jobNum in range(psource.getMaxJobs()): yield translateEntry(psource.getJobInfo(jobNum)) old = ParameterAdapter(None, GCDumpParameterSource(self._pathParams)) params_old = list(translatePSource(old)) new = ParameterAdapter(None, self._rawSource) params_new = list(translatePSource(new)) mapJob2PID = {} def sameParams(paramsAdded, paramsMissing, paramsSame, oldParam, newParam): if not oldParam[ParameterInfo.ACTIVE] and newParam[ParameterInfo.ACTIVE]: redo.add(newParam['GC_PARAM']) if oldParam[ParameterInfo.ACTIVE] and not newParam[ParameterInfo.ACTIVE]: disable.add(newParam['GC_PARAM']) mapJob2PID[oldParam['GC_PARAM']] = newParam['GC_PARAM'] (pAdded, pMissing, pSame) = utils.DiffLists(params_old, params_new, lambda a, b: cmp(a[ParameterInfo.HASH], b[ParameterInfo.HASH]), sameParams) # Construct complete parameter space psource with missing parameter entries and intervention state # NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file) # <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD oldMaxJobs = old.getMaxJobs() # assign sequential job numbers to the added parameter entries pAdded.sort(key = lambda x: x['GC_PARAM']) for (idx, meta) in enumerate(pAdded): if oldMaxJobs + idx != meta['GC_PARAM']: mapJob2PID[oldMaxJobs + idx] = meta['GC_PARAM'] missingInfos = [] newMaxJobs = new.getMaxJobs() pMissing.sort(key = lambda x: x['GC_PARAM']) for (idx, meta) in enumerate(pMissing): mapJob2PID[meta['GC_PARAM']] = newMaxJobs + idx tmp = old.getJobInfo(newMaxJobs + idx, meta['GC_PARAM']) tmp.pop('GC_PARAM') if tmp[ParameterInfo.ACTIVE]: tmp[ParameterInfo.ACTIVE] = False disable.add(newMaxJobs + idx) missingInfos.append(tmp) if missingInfos: from grid_control.parameters.psource_meta import ChainParameterSource from grid_control.parameters.psource_basic import InternalParameterSource currentInfoKeys = new.getJobKeys() missingInfoKeys = filter(lambda key: key not in currentInfoKeys, old.getJobKeys()) self._source = ChainParameterSource(self._rawSource, InternalParameterSource(missingInfos, missingInfoKeys)) self._mapJob2PID = mapJob2PID # Update Job2PID map redo = redo.difference(disable) if redo or disable: mapPID2Job = dict(map(lambda (k, v): (v, k), self._mapJob2PID.items())) translate = lambda pNum: mapPID2Job.get(pNum, pNum) self._resyncState = (set(map(translate, redo)), set(map(translate, disable)), sizeChange) elif sizeChange: self._resyncState = (set(), set(), sizeChange) # Write resynced state self.writeJob2PID(self._pathJob2PID + '.old') GCDumpParameterSource.write(self._pathParams + '.old', self) os.rename(self._pathJob2PID + '.old', self._pathJob2PID) os.rename(self._pathParams + '.old', self._pathParams)