def msGentlyFormat(self, mysCmd, mydVals, myg, myCG): ''' Like string.format() but does not raise exception if the string contains a name request for which the dictionary does not have a value. Leaves unfulfilled name requests in place. Method: construct a dictionary that contains something for every name requested in the string. The value is either a supplied value from the caller or a placeholder for the name request. Then use the now-defanged string.format() method. This is way harder than it ought to be, grumble. ''' # Make a dictionary from the names requested in the string # that just replaces the request '{foo}' with itself. sReNames = '(:?\{([^\}]+)\})+' oReNames = re.compile(sReNames) lNameTuples = oReNames.findall(mysCmd) NTRC.ntracef(3, "FMT", "proc gently tuples|%s|" % (lNameTuples)) lNames = [x[1] for x in lNameTuples] dNames = dict(zip(lNames, map(lambda s: "{" + s + "}", lNames))) # Pick up any specified values in the global object # and from CLI args. dNames.update(dict(vars(myCG))) dNames.update(dict(vars(myg))) # And then add values from the specific instructions. dNames.update(mydVals) NTRC.ntrace(3, "proc gently dnames|%s|" % (dNames)) sOut = mysCmd.format(**dNames) return sOut
def fnbIsItDone(self, mysInstructionId): dIsItDone = {"sDoneId": mysInstructionId} dMaybeDone = self.oDoneCollection.find_one(dIsItDone) NTRC.ntracef( 3, "DB", "proc check donelist id|%s| list|%s|" % (mysInstructionId, dMaybeDone)) return isinstance(dMaybeDone, dict) # None if not found.
def fnnProcessAllInstructions(myitInstructionIterator): ''' Get the set of instructions that match the user's criteria for this batch, and run them one by one. Each instruction (run) is executed once for each random seed value. Count the number of runs, and don't exceed the user's limit, if any. If the execution reports a serious error, stop the loop. ''' nRunNumber = 0 maxcount = int(g.nTestLimit) # Is this a completely fake test run? Replace templates. if g.sTestFib.startswith("Y"): g.lTemplates = g.lFibTemplates # Process each instruction in turn. for dRawInstruction in myitInstructionIterator: NTRC.ntracef(3, "MAIN", "proc main raw instruction\n|%s|" % (dRawInstruction)) dInstruction = fndMaybeEnhanceInstruction(dRawInstruction) NTRC.ntracef(3, "MAIN", "proc main enhanced instruction\n|%s|" % (dInstruction)) # Execute each instruction many times, once for each random seed value. nRunNumber += 1 fnnProcessOneInstructionManyTimes(nRunNumber, dInstruction) # If user asked for a short test run today, maybe stop now. maxcount -= 1 if int(g.nTestLimit) > 0 and maxcount <= 0: break # That's all, folks. All instructions have been queued and will # eventually be processed. # Send the shutdown messages to worker processes. g.cWorkersInst.Close() return nRunNumber
def fnSendOneJobSlowly(myInstruction, myqJobs): ''' Queue this instruction as a job. If the queue size gets out of hand, wait for some jobs to be removed from it.0 ''' # If qsize > hi threshold, wait for it to come down. """ Boy, the walrus operator would really come in handy here. But that would restrict us to Python versions >= 3.8. if (nQSize := myqJobs.qsize()) > g.nQThreshHi: NTRC.ntracef(3, "QTHR", "proc qsize over hi |%s|" % (nQSize)) while (nQSize := myqJobs.qsize()) > g.nQThreshLo: time.sleep(g.nQThreshSleep) NTRC.ntracef(3, "QTHR", "proc qsize under lo |%s|" % (nQSize)) """ nQSize = myqJobs.qsize() if nQSize > g.nQThreshHi: NTRC.ntracef(3, "QTHR", "proc qsize over hi |%s|" % (nQSize)) while True: time.sleep(g.nQThreshSleep) nQSize = myqJobs.qsize() if nQSize < g.nQThreshLo: break NTRC.ntracef(3, "QTHR", "proc qsize under lo |%s|" % (nQSize)) # Okay, now queue the job. myqJobs.put(myInstruction) return nQSize
def mSelectServersForCollection(self, mynCollValue): '''\ Get list of servers at this quality level. Return a random permutation of the list of servers. Oops, not any more. Just a list of usable ones. ''' # Get list of all servers at this quality level. # Value level translates to quality required and nr copies. (nQuality, nCopies) = G.dDistnParams[mynCollValue][0] lServersAtLevel = [ll[1] for ll in G.dQual2Servers[nQuality]] '''\ For most questions, all servers are functionally identical. Just take the right number of them. We used to take a random permutation of the list of servers and choose from those, hence the name "Perm", but don't waste the effort any more. NEW: return only servers that are not already in use and not broken. ''' lPermChosenAlive = [svr for svr in lServersAtLevel if not G.dID2Server[svr].bDead] lPermChosenAvail = [svr for svr in lPermChosenAlive if not G.dID2Server[svr].bInUse] NTRC.ntracef(3, "CLI", "proc servers chosen level|%s| alive|%s| " "full|%s|" % (lServersAtLevel, lPermChosenAlive, lPermChosenAvail)) # Just make sure there are enough of them to meet the client's needs. if len(lPermChosenAlive) < nCopies: # Not enough servers available; someone will have to create one. lPermChosen = [] else: lPermChosen = lPermChosenAvail[0:nCopies] return lPermChosen
def cmBeforeAudit(self): ''' Before each audit cycle, check to see if any servers have exceeded their lifetimes. ''' for (sServerID, cServer) in (util.fnttSortIDDict(G.dID2Server)): fCurrentLife = cServer.mfGetMyCurrentLife() fFullLife = cServer.mfGetMyFullLife() fBirthday = cServer.mfGetMyBirthday() bServerAlive = not cServer.mbIsServerDead() bServerActive = cServer.bInUse # Log that we are examining this server, # but note if it's already dead. sStatus = "inuse" if bServerActive else "" sStatus = sStatus if bServerAlive else "dead" lg.logInfo("SHOCK ", "t|%6.0f| audit+end check svr|%s| " "life|%.0f|=|%.1f|yr %s" % (G.env.now, sServerID, fFullLife, fFullLife/10000, sStatus)) NTRC.ntracef(3, "SHOK", "proc t|%6.0f| check expir? svr|%s| " "svrdefaulthalflife|%s| born|%s| currlife|%s|" % (G.env.now, sServerID, G.fServerDefaultHalflife, fBirthday, fCurrentLife)) # Check to see if the server's lifetime has expired. bDeadAlready = CShock.cmbShouldServerDieNow(sServerID) return G.nDeadOldServers
def fnnProcessAllInstructions(myitInstructionIterator): ''' Get the set of instructions that match the user's criteria for this batch, and run them one by one. Each instruction (run) is executed once for each random seed value. Count the number of runs, and don't exceed the user's limit, if any. If the execution reports a serious error, stop the loop. ''' nRunNumber = 0 maxcount = int(g.nTestLimit) # Is this a completely fake test run? Replace templates. if g.sTestFib.startswith("Y"): g.lTemplates = g.lFibTemplates # Process each instruction in turn. for dRawInstruction in myitInstructionIterator: NTRC.ntracef(3,"MAIN","proc main raw instruction\n|%s|" % (dRawInstruction)) dInstruction = fndMaybeEnhanceInstruction(dRawInstruction) NTRC.ntracef(3,"MAIN","proc main enhanced instruction\n|%s|" % (dInstruction)) # Execute each instruction once for each random seed value. nRunNumber += 1 lManyInstr = fnltProcessOneInstructionManyTimes(nRunNumber , dInstruction) g.lGiantInstr.extend(lManyInstr) # If user asked for a short test run today, maybe stop now. maxcount -= 1 if int(g.nTestLimit) > 0 and maxcount <= 0: break return nRunNumber
def fnlsDoOneCmdList(mylLines): """Input: list of instructions generated for this case; multiprocessing queue through which to report results. Remove blanks, comments, etc., from the instructions. Each line that is not blank or comment is a command to be executed. Blanks and comments are written directly into the output. Output: list of commands and their output, to be logged and sent to the supplied queue. """ # Process all command lines of the instruction list and collect results. lResults = [] # list of strings for nLine, sLine in enumerate(mylLines): if fnbDoNotIgnoreLine(sLine): # Genuine line; execute and collect answer line(s). tAnswer = fntDoOneCmdLine(sLine) (nRtn, nErr, lResult) = (tAnswer.callstatus , tAnswer.cmdstatus , tAnswer.ltext) lResults.extend(lResult) NTRC.ntracef(4, "DOCL", "proc DoOneList line|%s| " "lResult|%s|" % (nLine, lResult)) else: # Comment or blank line; just append to results. lResults.extend([("-"*len(fnsGetTimestamp())) , (fnsGetTimestamp() + " " + sLine)]) NTRC.ntracef(4, "DOCL", "proc DoOneCase line|%s| " "comment|%s|" % (nLine, sLine)) return lResults
def mEvaluateMe(self): '''\ Return tuple of four bools stating doc status. How many copies do I have left (if any)? ''' nCopiesLeft = len( filter( (lambda sServerID: self.mTestOneServer(sServerID)) ,self.lServerIDs) ) # Are there any or enough copies left from which to repair the doc? nNumberOfServers = len(self.setServerIDsAll) nMajorityOfServers = (nNumberOfServers + 1) / 2 # Include results from previous audits (if any). (bOkay, bMajority, bMinority, bLost) = (self.bDocumentOkay, self.bMajorityRepair,self.bMinorityRepair,self.bDocumentLost) NTRC.ntracef(3,"DOC","proc mEvaluateMe doc|%s| ncopies|%s| nservers|%s| okay|%s| majority|%s| minority|%s| lost|%s|" % (self.ID,nCopiesLeft,nNumberOfServers,bOkay,bMajority,bMinority,bLost)) if nCopiesLeft > 0: # If there is a majority of copies remaining, # then unambiguous repair is possible. if nCopiesLeft < nNumberOfServers and nCopiesLeft >= nMajorityOfServers: bMajority = True bOkay = False # Some copies left, but not enough for unambiguous repair. # Record that forensics are required for this doc repair. elif nCopiesLeft < nMajorityOfServers: bMinority = True bOkay = False # There are no remaining copies of the doc, # it cannot be repaired ever, oops. Permanent loss. else: bLost = True bOkay = False return (bOkay,bMajority,bMinority,bLost)
def mServerIsDead(self, mysServerID, mysCollID): '''\ Auditor calls us: a server is dead, no longer accepting documents. Remove server from active list, find a new server, populate it. ''' NTRC.ntracef(3, "CLI", "proc deadserver1 client|%s| place coll|%s| " "to|%d|servers" % (self.ID, mysCollID, len(self.lServersToUse))) lg.logInfo("CLIENT", "server died cli|%s| removed svr|%s| coll|%s| " % (self.ID, mysServerID, mysCollID)) cColl = G.dID2Collection[mysCollID] cColl.lServerIDs.remove(mysServerID) nCollValue = cColl.nValue lServersForCollection = self.mSelectServersForCollection(nCollValue) # The distribution params have already limited the # set of servers in the select-for-collection routine. # If there are servers available, pick one. Otherwise, # create a new server that's just like an old one and use it. if lServersForCollection: sServerToUse = lServersForCollection.pop(0) else: sServerToUse = CServer.fnsInventNewServer() lg.logInfo("CLIENT", "client|%s| assign new server|%s| to replace|%s|" % (self.ID, sServerToUse, mysServerID)) nDocs = self.mPlaceCollectionOnServer(mysCollID, sServerToUse) lg.logInfo("CLIENT", "client|%s| provisioned new server|%s| " "collection|%s| ndocs|%s|" % (self.ID, sServerToUse, mysCollID, nDocs)) self.nServerReplacements += 1 return sServerToUse
def fnDoOneJob(mytInstruction): ''' Execute a single job: Do all lines. Log results and convey to output queue. ''' # Get my name and number for ident. sWhoami = mp.current_process().name NTRC.ntracef(3, "DO1J", "proc procname|%s|" % (sWhoami)) nProc = fnsGetProcessNumber(sWhoami) # Unpack instruction command list and other items. lInstructions = mytInstruction.cmdlist (sLogfileDir, sLogfileName) = (mytInstruction.logdir , mytInstruction.logname) qToUse = CWorkers.getOutputQueue() lResults = fnlsDoOneCmdList(lInstructions) # Send official results to the log file. fnWriteLogFile((lResults), sLogfileDir, sLogfileName) # If an output queue specified, pack up the answer and send it. if qToUse: # And send a copy of results to the specified output queue. lPrefix = [("BEGIN results from " + sWhoami)] lSuffix = [("ENDOF results from " + sWhoami)] lResultsToSee = ['\n'] + lPrefix + lResults + lSuffix + ['\n'] tAnswers = tLinesOut(procname=sWhoami, listoflists=lResultsToSee) qToUse.put(tAnswers)
def __init__(self,size,mysClientID,mysCollectionID): self.ID = "D" + str(self.getID()) # BEWARE: if we have more than 10,000 docs, a fixed-length # representation will have to change. Bad idea; don't use it. # Change the sorting algorithm instead. # self.ID = "D" + "%04d"%(self.getID()) # So, don't use it. G.dID2Document[self.ID] = self G.nDocLastID = self.ID self.nSize = size # Who owns this doc self.sClientID = mysClientID # Doc owned by what client self.sCollID = mysCollectionID # Doc lives in what collection NTRC.ntracef(3,"DOC","proc init client|%s| created doc|%s| size|%d|" % (self.sClientID,self.ID,self.nSize)) # Where are copies of this doc stored self.lServerIDs = list() # What servers currently have this doc self.lCopyIDs = list() # What copy IDs are there of this doc self.setServerIDsAll = set([]) # What servers have ever had a copy # How has the doc fared in the storage wars self.bMajorityRepair = False # True if ever repaired from majority of copies self.bMinorityRepair = False # True if ever repaired from minority of copies self.bDocumentLost = False # True if completely lost, all copies lost self.bDocumentOkay = True # True if never repaired or lost self.nRepairsMajority = 0 # Number of repairs of doc from majority copies self.nRepairsMinority = 0 # Number of repairs of doc from minority copies
def __init__(self, mygl, mynWaitMsec): threading.Thread.__init__(self, name="endall") self.gl = mygl self.nWaitMsec = mynWaitMsec self.llsFullOutput = list() NTRC.ntracef(2, "END", "exit init gl|%s| wait|%s|" % (self.gl, self.nWaitMsec))
def fntRunEverything(mygl, qInstr, fnbQEnd, nWaitMsec, nWaitHowMany): '''Start an async job for each case. Limit number of concurrent jobs to the size of the ltJobs vector. When a job completes, ship its output upline and remove it from the active lists. Two separate threads: - Wait for an empty slot; get an instruction, start an async job. - Wait for an active job to complete and remove it from lists. ''' # Fill the list of jobs with empties. for i in range(mygl.nParallel + 1): mygl.ltJobs.append(None) mygl.lockJobList = threading.Lock() mygl.lockPrint = threading.Lock() # Create and start new threads NTRC.ntracef(5, "RUN", "proc make thread instances") mygl.thrStart = CStartAllCases(mygl, mygl.nCoreTimer, mygl.nStuckLimit , qInstr, fnbQEnd) mygl.thrEnd = CEndAllCases(mygl, mygl.nCoreTimer, ) mygl.llsFullOutput = [["",""]] #mygl.thrStart.start() #mygl.thrEnd.start() # Wait until all jobs have started and finished. if (mygl.thrStart.is_alive() and mygl.thrStart.is_alive()): mygl.thrStart.join() # Runs out of instructions. mygl.thrEnd.join() # Runs out of finished jobs. return tWaitStats(ncases=mygl.nCasesDone , slot=mygl.nWaitedForSlot , done=mygl.nWaitedForDone , inst=mygl.nWaitedForInstr)
def __init__(self, size, mysClientID, mysCollectionID): self.ID = "D" + str(self.getID()) # BEWARE: if we have more than 10,000 docs, a fixed-length # representation will have to change. Bad idea; don't use it. # Change the sorting algorithm instead. # self.ID = "D" + "%04d"%(self.getID()) # So, don't use it. G.dID2Document[self.ID] = self G.nDocLastID = self.ID self.nSize = size # Who owns this doc self.sClientID = mysClientID # Doc owned by what client self.sCollID = mysCollectionID # Doc lives in what collection NTRC.ntracef( 3, "DOC", "proc init client|%s| created doc|%s| size|%d|" % (self.sClientID, self.ID, self.nSize)) # Where are copies of this doc stored self.lServerIDs = list() # What servers currently have this doc self.lCopyIDs = list() # What copy IDs are there of this doc self.setServerIDsAll = set([]) # What servers have ever had a copy # How has the doc fared in the storage wars self.bMajorityRepair = False # True if ever repaired from majority of copies self.bMinorityRepair = False # True if ever repaired from minority of copies self.bDocumentLost = False # True if completely lost, all copies lost self.bDocumentOkay = True # True if never repaired or lost self.nRepairsMajority = 0 # Number of repairs of doc from majority copies self.nRepairsMinority = 0 # Number of repairs of doc from minority copies
def cmBeforeAudit(self): ''' Before each audit cycle, check to see if any servers have exceeded their lifetimes. ''' for (sServerID, cServer) in (util.fnttSortIDDict(G.dID2Server)): fCurrentLife = cServer.mfGetMyCurrentLife() fFullLife = cServer.mfGetMyFullLife() fBirthday = cServer.mfGetMyBirthday() bServerAlive = not cServer.mbIsServerDead() bServerActive = cServer.bInUse # Log that we are examining this server, # but note if it's already dead. sStatus = "inuse" if bServerActive else "" sStatus = sStatus if bServerAlive else "dead" lg.logInfo( "SHOCK ", "t|%6.0f| audit+end check svr|%s| " "life|%.0f|=|%.1f|yr %s" % (G.env.now, sServerID, fFullLife, fFullLife / 10000, sStatus)) NTRC.ntracef( 3, "SHOK", "proc t|%6.0f| check expir? svr|%s| " "svrdefaulthalflife|%s| born|%s| currlife|%s|" % (G.env.now, sServerID, G.fServerDefaultHalflife, fBirthday, fCurrentLife)) # Check to see if the server's lifetime has expired. bDeadAlready = CShock.cmbShouldServerDieNow(sServerID) return G.nDeadOldServers
def msGentlyFormat(self, mysCmd, mydVals, myg, myCG): ''' Like string.format() but does not raise exception if the string contains a name request for which the dictionary does not have a value. Leaves unfulfilled name requests in place. Method: construct a dictionary that contains something for every name requested in the string. The value is either a supplied value from the caller or a placeholder for the name request. Then use the now-defanged string.format() method. This is way harder than it ought to be, grumble. ''' # Make a dictionary from the names requested in the string # that just replaces the request '{foo}' with itself. sReNames = '(:?\{([^\}]+)\})+' oReNames = re.compile(sReNames) lNameTuples = oReNames.findall(mysCmd) NTRC.ntracef(3,"FMT","proc gently tuples|%s|" % (lNameTuples)) lNames = [x[1] for x in lNameTuples] dNames = dict(zip(lNames, map(lambda s: "{"+s+"}", lNames))) # Pick up any specified values in the global object # and from CLI args. dNames.update(dict(vars(myCG))) dNames.update(dict(vars(myg))) # And then add values from the specific instructions. dNames.update(mydVals) NTRC.ntrace(3,"proc gently dnames|%s|" % (dNames)) sOut = mysCmd.format(**dNames) return sOut
def fndFormatQuery(self, mydCli, myg): ''' Take all the CLI options that might specify a searchable attribute, and construct a MongoDB or searchspace query dictionary. This is lots nastier than it first appears to be because json is so bloody picky. ''' dOut = dict() for sAttrib, sValue in mydCli.items(): result = None if sValue is not None: # Is it something valid in json? try: result = json.loads(sValue) except ValueError: # Is it a string that should be an integer, ok in json? try: result = int(sValue) except: # Is it a naked string for some string-valued var # that isn't just Y/N or a mandatory string? # Rule out dict values that are already formatted. if (isinstance(sValue, str) and sAttrib not in myg.lYesNoOptions and sAttrib not in myg.lMandatoryArgs and '{' not in sValue and '}' not in sValue and ':' not in sValue and ',' not in sValue): result = '{"$eq":' + '"' + sValue + '"' + '}' else: result = sValue NTRC.tracef( 3, "FMT", "proc FormatQuery notjson item " "key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) NTRC.tracef( 3, "FMT", "proc FormatQuery item key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) # Can't process dicts thru json twice. if isinstance(result, dict): dOut[sAttrib] = sValue else: dOut[sAttrib] = result # Allow only attribs that appear in the database, else will get # no results due to implied AND of all items in query dict. dOutSafe = {k: v for k, v in dOut.items() if k in myg.lSearchables} dOutNotNone = {k: v for k, v in dOutSafe.items() if v is not None} NTRC.ntracef( 3, "FMT", "proc dict b4|%s| \nsafe|%s|\nclean|%s|" % (dOut, dOutSafe, dOutNotNone)) if "sQuery" in dOutNotNone.keys(): # If the brave user has supplied a full, standalone query string, # add its contents to the query dict so far. dTmp = dOutNotNone["sQuery"] del dOutNotNone["sQuery"] dOutNotNone.update(dTmp) return dOutNotNone
def fndFormatQuery(self, mydCli, myg): ''' Take all the CLI options that might specify a searchable attribute, and construct a MongoDB or searchspace query dictionary. This is lots nastier than it first appears to be because json is so bloody picky. ''' dOut = dict() for sAttrib,sValue in mydCli.items(): result = None if sValue is not None: # Is it something valid in json? try: result = json.loads(sValue) except ValueError: # Is it a string that should be an integer, ok in json? try: result = int(sValue) except: # Is it a naked string for some string-valued var # that isn't just Y/N or a mandatory string? # Rule out dict values that are already formatted. if (isinstance(sValue, str) and sAttrib not in myg.lYesNoOptions and sAttrib not in myg.lMandatoryArgs and '{' not in sValue and '}' not in sValue and ':' not in sValue and ',' not in sValue ): result = '{"$eq":' + '"'+sValue+'"' + '}' else: result = sValue NTRC.tracef(3, "FMT", "proc FormatQuery notjson item " "key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) NTRC.tracef(3, "FMT", "proc FormatQuery item key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) # Can't process dicts thru json twice. if isinstance(result, dict): dOut[sAttrib] = sValue else: dOut[sAttrib] = result # Allow only attribs that appear in the database, else will get # no results due to implied AND of all items in query dict. dOutSafe = {k:v for k,v in dOut.items() if k in myg.lSearchables} dOutNotNone = {k:v for k,v in dOutSafe.items() if v is not None} NTRC.ntracef(3,"FMT","proc dict b4|%s| \nsafe|%s|\nclean|%s|" % (dOut,dOutSafe,dOutNotNone)) if "sQuery" in dOutNotNone.keys(): # If the brave user has supplied a full, standalone query string, # add its contents to the query dict so far. dTmp = dOutNotNone["sQuery"] del dOutNotNone["sQuery"] dOutNotNone.update(dTmp) return dOutNotNone
def mScheduleGlitch(self): '''Wait for a glitch lifetime on this shelf. If the shelf died as a result of the glitch, stop rescheduling. ''' fNow = G.env.now NTRC.tracef( 3, "LIFE", "proc schedule glitch t|%d| shelf|%s| alive|%s|" % (fNow, self.sShelfID, self.cShelf.mbIsShelfAlive())) while 1: fNow = G.env.now bAlive = self.cShelf.mbIsShelfAlive() if bAlive: self.fShelfLife = self.mfCalcCurrentGlitchLifetime(fNow) if self.fShelfLife > 0 and bAlive: self.fShelfInterval = util.makeexpo(self.fShelfLife) lg.logInfo( "LIFETIME", "schedule t|%6.0f| for shelf|%s| " "interval|%.3f| freq|%d| life|%.3f|" % (fNow, self.sShelfID, self.fShelfInterval, self.nGlitchFreq, self.fShelfLife)) NTRC.tracef( 3, "LIFE", "proc schedule glitch shelf|%s| " "interval|%.3f| based on life|%.3f| alive|%s| " "waiting..." % (self.sShelfID, self.fShelfInterval, self.fShelfLife, bAlive)) yield G.env.timeout(self.fShelfInterval) # ****** Glitch has now occurred. ****** # If correlated failure, step entirely outside the # Lifetime-Shelf-Server context to signal several servers. if self.nGlitchSpan > 1: from server import CServer CServer.fnCorrFailHappensToAll(self.nGlitchSpan) else: self.mGlitchHappensNow() else: NTRC.ntracef( 3, "LIFE", "proc glitch no freq or not alive, " "set wait to infinity shelf|%s| freq|%d| life|%.3f| " "interval|%.3f|" % (self.sShelfID, self.nGlitchFreq, self.fShelfLife, self.fShelfInterval)) yield G.env.timeout(G.fInfinity) else: break # Because we have to use fako "while 1". # When shelf is not alive anymore, wait forever NTRC.ntracef( 3, "LIFE", "proc glitch shelf no longer alive, set wait " "to infinity shelf|%s| freq|%d| life|%.3f| interval|%.3f|" % (self.sShelfID, self.nGlitchFreq, self.fShelfLife, self.fShelfInterval)) yield G.env.timeout(G.fInfinity)
def fndgGetSearchSpace(mysDir, mysTyp, mydUserRuleDict): ''' Produce instruction stream from instruction files and user rules. ''' dFullDict = fndReadAllInsFiles(mysDir, mysTyp) (dTrimmedDict, dOriginalDict) = fntProcessAllUserRules(mydUserRuleDict, dFullDict) dFilteredDict = fndFilterResults(dTrimmedDict) fnvTestResults(dFilteredDict, dFullDict) NTRC.ntracef(3, "SRCH", "proc GetSearchSpace:FilteredDict|%s|" % (dFilteredDict)) return fndgCombineResults(dFilteredDict)
def fnvGetEnvironmentOverrides(): # Allow user to override number of cores to use today. # Utility routine looks at HW and possible user envir override. g.nCores = brokergetcores.fnnGetResolvedCores() NTRC.ntracef(0, "MAIN", "proc ncores|%s|" % (g.nCores)) # Allow user to override the polite interval to use today. try: g.nPoliteTimer = int(os.getenv("NPOLITE", CG.nPoliteTimer)) NTRC.ntracef(0, "MAIN", "proc politetimer|%s|msec" % (g.nPoliteTimer)) except (ValueError, TypeError): raise TypeError("Environment variable NPOLITE must be " "an integer number of milliseconds.")
def fndgGetSearchSpace(mysDir, mysTyp, mydUserRuleDict): ''' Produce instruction stream from instruction files and user rules. ''' dFullDict = fndReadAllInsFiles(mysDir, mysTyp) (dTrimmedDict,dOriginalDict) = fntProcessAllUserRules(mydUserRuleDict, dFullDict) dFilteredDict = fndFilterResults(dTrimmedDict) fnvTestResults(dFilteredDict, dFullDict) NTRC.ntracef(3, "SRCH", "proc GetSearchSpace:FilteredDict|%s|" % (dFilteredDict)) return fndgCombineResults(dFilteredDict)
def mMergeEvaluation(self,mybOkay,mybMajority,mybMinority,mybLost): '''\ Carefully combine new doc info with old from audits, if any. E.g., finally okay only if was okay and still is okay; finally lost if was lost or is now lost. ''' NTRC.ntracef(3,"DOC","proc merge in|%s|%s|%s|%s| with doc|%s|%s|%s|%s|" % (mybOkay,mybMajority,mybMinority,mybLost,self.bDocumentOkay,self.bMajorityRepair,self.bMinorityRepair,self.bDocumentLost)) self.bDocumentOkay = self.bDocumentOkay and mybOkay self.bMajorityRepair = self.bMajorityRepair or mybMajority self.bMinorityRepair = self.bMinorityRepair or mybMinority self.bDocumentLost = self.bDocumentLost or mybLost return (self.bDocumentOkay,self.bMajorityRepair,self.bMinorityRepair,self.bDocumentLost)
def __init__(self, mygl , mynWaitMsec, mynWaitHowMany , myqInstructions, myfnbEnd ): threading.Thread.__init__(self, name="startall") self.gl = mygl self.nWaitMsec = mynWaitMsec self.nWaitHowMany = mynWaitHowMany self.nCounter = itertools.count(1) self.nProcess = 0 self.qInstructions = myqInstructions self.fnbEnd = myfnbEnd NTRC.ntracef(2, "STRT", "exit init gl|%s| instrs|%s|" % (self.gl, self.qInstructions))
def fnnCalcDocSize(mynLevel): lPercents = G.dDocParams[mynLevel] nPctRandom = makeunif(0,100) nPctCum = 0 for lTriple in lPercents: (nPercent, nMean, nSdev) = lTriple nPctCum += nPercent if nPctRandom <= nPctCum: nDocSize = int(makennnorm(nMean, nSdev)) NTRC.ntracef(3,"DOC","proc CalcDocSize rand|%s| cum|%s| pct|%s| " "mean|%s| sd|%s| siz|%s|" % (nPctRandom,nPctCum,nPercent,nMean,nSdev,nDocSize)) break return nDocSize
def fnvGetEnvironmentOverrides(): # Allow user to override number of cores to use today. # Utility routine looks at HW and possible user envir override. g.nCores = brokergetcores.fnnGetResolvedCores() NTRC.ntracef(0, "MAIN", "proc ncores|%s|" % (g.nCores)) g.nParallel = g.nCores # Sorry for the name change. # Allow user to override the polite interval to use today. try: g.nPoliteTimer = int(os.getenv("NPOLITE", CG.nPoliteTimer)) g.nCoreTimer = g.nPoliteTimer # Sorry for the name change. NTRC.ntracef(0, "MAIN", "proc politetimer|%s|msec" % (g.nPoliteTimer)) except (ValueError, TypeError): raise TypeError("Environment variable NPOLITE must be " "an integer number of milliseconds.")
def mGlitchHappensNow(self): """Start a glitch happening right now. May be invoked from outside a CLifetime instance as well as from inside.""" fNow = G.env.now NTRC.ntracef( 3, "LIFE", "proc glitch wait expired t|%6.0f| " "for shelf|%s| freq|%d| life|%.3f| interval|%.3f|" % (fNow, self.sShelfID, self.nGlitchFreq, self.fShelfLife, self.fShelfInterval)) self.mGlitchHappens(fNow) lg.logInfo( "LIFETIME", "glitchnow t|%6.0f| for shelf|%s| active|%s|" % (fNow, self.sShelfID, self.bGlitchActive))
def doManyJobs(myqJobs): ''' This is the guy who gets called as a job worker Read a job from input queue. If it is a real instruction, do it. If it is an end code, exit. ''' while True: tInstructionJob = myqJobs.get() sWhoami = mp.current_process().name NTRC.ntracef(3, "DOMJ", "proc DoManyJobs|%s| qget|%s|" % (sWhoami, tInstructionJob,)) if tInstructionJob.cmdlist: result = fnDoOneJob(tInstructionJob) else: sys.exit(0)
def defaultReceiveOutput(myqOutput): while True: tAnswers = myqOutput.get() sWhoami = mp.current_process().name NTRC.ntracef( 3, "RCVO", "proc DefRcvOut|%s| got output |%s|" % (sWhoami, repr(tAnswers))) lOutput = tAnswers.listoflists if lOutput: # Print it all on stdout. for sLine in lOutput: print(sLine) print("--------------") else: sys.exit(0)
def fnnCalcDocSize(mynLevel): lPercents = G.dDocParams[mynLevel] nPctRandom = makeunif(0, 100) nPctCum = 0 for lTriple in lPercents: (nPercent, nMean, nSdev) = lTriple nPctCum += nPercent if nPctRandom <= nPctCum: nDocSize = int(makennnorm(nMean, nSdev)) NTRC.ntracef( 3, "DOC", "proc CalcDocSize rand|%s| cum|%s| pct|%s| " "mean|%s| sd|%s| siz|%s|" % (nPctRandom, nPctCum, nPercent, nMean, nSdev, nDocSize)) break return nDocSize
def mDestroyShelf(self): ''' Nuke all the copies on the shelf. Can't delete the CShelf object, however. ''' NTRC.ntracef(3, "SHLF", "proc mDestroyShelf1 shelf|%s| " "has ncopies|%s|" % (self.ID, len(self.lCopyIDs))) lg.logInfo("SHELF ", "t|%6.0f| destroy shelf|%s| " "of svr|%s| ncopies|%s|" % (G.env.now, self.ID, self.sServerID, len(self.lCopyIDs))) lAllCopyIDs = self.lCopyIDs[:] # DANGER: list modified inside loop, # requires deepcopy. for sCopyID in lAllCopyIDs: self.mDestroyCopy(sCopyID)
def mDestroyShelf(self): ''' Nuke all the copies on the shelf. Can't delete the CShelf object, however. ''' NTRC.ntracef( 3, "SHLF", "proc mDestroyShelf1 shelf|%s| " "has ncopies|%s|" % (self.ID, len(self.lCopyIDs))) lg.logInfo( "SHELF ", "t|%6.0f| destroy shelf|%s| " "of svr|%s| ncopies|%s|" % (G.env.now, self.ID, self.sServerID, len(self.lCopyIDs))) lAllCopyIDs = self.lCopyIDs[:] # DANGER: list modified inside loop, # requires deepcopy. for sCopyID in lAllCopyIDs: self.mDestroyCopy(sCopyID)
def fndgCombineResults(mydInstructions): ''' Expand the cross product of remaining instruction values. ''' lKeyNames = [k for k in mydInstructions.keys()] for lInstruction in itertools.product(*[mydInstructions[sKey] for sKey in lKeyNames]): dInstruction = dict(zip(lKeyNames, lInstruction)) # Add unique id, as Mongo does, so we can find jobs already done. ### dInstruction["_id"] = hashlib.sha1(str(dInstruction)).hexdigest() dInstruction["_id"] = hashlib.sha1(str(dInstruction).encode('ascii')).hexdigest() NTRC.ntracef(3, "SRCH", "proc CombineResults:dInstruction|%s|" % (dInstruction)) yield dInstruction '''
def fndgCombineResults(mydInstructions): ''' Expand the cross product of remaining instruction values. ''' lKeyNames = [k for k in mydInstructions.keys()] for lInstruction in itertools.product( *[mydInstructions[sKey] for sKey in lKeyNames]): dInstruction = dict(zip(lKeyNames, lInstruction)) # Add unique id, as Mongo does, so we can find jobs already done. ### dInstruction["_id"] = hashlib.sha1(str(dInstruction)).hexdigest() dInstruction["_id"] = (hashlib.sha1( str(dInstruction).encode('ascii')).hexdigest()) NTRC.ntracef(3, "SRCH", "proc CombineResults:dInstruction|%s|" % (dInstruction)) yield dInstruction '''
def mServerDies(self): if not self.bDead: self.bDead = True # Destroy all doc ids so that audit will not find any. # TODO: #mark all documents as injured NTRC.ntracef( 3, "SERV", "proc mServerDies kill ndocs|%s|" % (len(self.lDocIDs))) # self.lDocIDs = list() # self.dDocIDs = dict() # Shall we destroy all the shelves, too, or will that also # cause a problem? for sShelfID in self.lShelfIDs: G.dID2Shelf[sShelfID].mDestroyShelf() # TODO: #mark all shelves as not bAlive pass
def defaultReceiveOutput(myqOutput): sWhoami = mp.current_process().name nWorkOutput = 0 while True: tAnswers = myqOutput.get() nWorkOutput += 1 NTRC.ntracef(3, "RCVO", "proc DefRcvOut|%s| got output |%s| ncase|%s|" % (sWhoami, repr(tAnswers), nWorkOutput)) lOutput = tAnswers.listoflists if lOutput: # Print it all on stdout. print(f'--------------- case {nWorkOutput}') for sLine in lOutput: print(sLine) print(f'-------- end of case {nWorkOutput}') else: sys.exit(0)
def mMergeEvaluation(self, mybOkay, mybMajority, mybMinority, mybLost): '''\ Carefully combine new doc info with old from audits, if any. E.g., finally okay only if was okay and still is okay; finally lost if was lost or is now lost. ''' NTRC.ntracef( 3, "DOC", "proc merge in|%s|%s|%s|%s| with doc|%s|%s|%s|%s|" % (mybOkay, mybMajority, mybMinority, mybLost, self.bDocumentOkay, self.bMajorityRepair, self.bMinorityRepair, self.bDocumentLost)) self.bDocumentOkay = self.bDocumentOkay and mybOkay self.bMajorityRepair = self.bMajorityRepair or mybMajority self.bMinorityRepair = self.bMinorityRepair or mybMinority self.bDocumentLost = self.bDocumentLost or mybLost return (self.bDocumentOkay, self.bMajorityRepair, self.bMinorityRepair, self.bDocumentLost)
def cmbShouldServerDieNow(self, mysServerID): ''' If the server's (possibly reduced) lifetime has expired, kill it rather than restoring it to a full life. ''' cServer = G.dID2Server[mysServerID] fCurrentLife = cServer.mfGetMyCurrentLife() fFullLife = cServer.mfGetMyFullLife() fBirthday = cServer.mfGetMyBirthday() bServerAlive = not cServer.mbIsServerDead() if (G.fServerDefaultHalflife > 0 and fCurrentLife > 0 and fFullLife <= G.env.now and bServerAlive ): # Server has overstayed its welcome. Kill it. sInUse = "currently in use" if cServer.mbIsServerInUse() else "" sShockVictim = "shock victim" if cServer.mbIsServerInShock() else "" lg.logInfo("SHOCK ", "t|%6.0f| kill svr|%s| " "born|%.0f| life|%.0f|=|%.1f|yr " "expired %s %s" % (G.env.now, mysServerID, fBirthday, fCurrentLife, fCurrentLife/10000, sInUse, sShockVictim)) NTRC.ntracef(3, "SHOK", "proc t|%6.0f| expired svr|%s| " "svrdefaulthalflife|%s| born|%.0f| currlife|%.0f|" % (G.env.now, mysServerID, G.fServerDefaultHalflife, fBirthday, fCurrentLife)) result = cServer.mKillServer() G.nDeadOldServers += 1 bResult = True # Now check to see if the server died because of the shock. # Is the current life less than the original life? # Philosophical question: if the shock type 2 caused your new, # recalculated life to be longer than your original life, # can your death reasonably be attributed to the shock? # Answer = no, because without the shock you would have # died even earlier. Tricky, though. fOriginalLife = cServer.mfGetMyOriginalLife() if fCurrentLife < fOriginalLife: G.nDeathsDueToShock += 1 G.lDeathsDueToShock.append(mysServerID) else: bResult = False return bResult
def makeServers(mydServers): for sServerName in mydServers: (nServerQual,nShelfSize) = mydServers[sServerName][0] cServer = server.CServer(sServerName,nServerQual,nShelfSize) sServerID = cServer.ID G.lAllServers.append(cServer) fCurrentLife = cServer.mfGetMyCurrentLife() lg.logInfo("MAIN","created server|%s| quality|%s| shelfsize|%s|TB " "name|%s| life|%.0f|" % (sServerID, nServerQual, nShelfSize, sServerName, fCurrentLife)) # Invert the server list so that clients can look up # all the servers that satisfy a quality criterion. if nServerQual in G.dQual2Servers: G.dQual2Servers[nServerQual].append([sServerName,sServerID]) else: G.dQual2Servers[nServerQual] = [[sServerName,sServerID]] NTRC.ntracef(5,"SVRS","proc makeServers dQual2Servers qual|%s| servers|%s|" % (nServerQual,G.dQual2Servers[nServerQual])) return G.dQual2Servers
def mTestClient(self): '''\ Return list, maybe empty, of all documents missing from this client. All collections appended together. ''' lDeadDocIDs = list() for sCollID in self.lCollectionIDs: cColl = G.dID2Collection[sCollID] lResult = cColl.mTestCollection() NTRC.ntracef(3, "CLI", "proc TestClient1 client|%s| " "tests coll|%s| result|%s|" % (self.ID, sCollID, lResult)) if len(lResult) > 0: lDeadDocIDs.extend(lResult) NTRC.ntracef(3, "CLI", "proc TestClient2 client |%s| " "coll|%s| lost docs|%s|" % (self.ID, sCollID, lResult)) return lDeadDocIDs
def cmbShouldServerDieNow(self, mysServerID): ''' If the server's (possibly reduced) lifetime has expired, kill it rather than restoring it to a full life. ''' cServer = G.dID2Server[mysServerID] fCurrentLife = cServer.mfGetMyCurrentLife() fFullLife = cServer.mfGetMyFullLife() fBirthday = cServer.mfGetMyBirthday() bServerAlive = not cServer.mbIsServerDead() if (G.fServerDefaultHalflife > 0 and fCurrentLife > 0 and fFullLife <= G.env.now and bServerAlive): # Server has overstayed its welcome. Kill it. sInUse = "currently in use" if cServer.mbIsServerInUse() else "" sShockVictim = "shock victim" if cServer.mbIsServerInShock( ) else "" lg.logInfo( "SHOCK ", "t|%6.0f| kill svr|%s| " "born|%.0f| life|%.0f|=|%.1f|yr " "expired %s %s" % (G.env.now, mysServerID, fBirthday, fCurrentLife, fCurrentLife / 10000, sInUse, sShockVictim)) NTRC.ntracef( 3, "SHOK", "proc t|%6.0f| expired svr|%s| " "svrdefaulthalflife|%s| born|%.0f| currlife|%.0f|" % (G.env.now, mysServerID, G.fServerDefaultHalflife, fBirthday, fCurrentLife)) result = cServer.mKillServer() G.nDeadOldServers += 1 bResult = True # Now check to see if the server died because of the shock. # Is the current life less than the original life? # Philosophical question: if the shock type 2 caused your new, # recalculated life to be longer than your original life, # can your death reasonably be attributed to the shock? # Answer = no, because without the shock you would have # died even earlier. Tricky, though. fOriginalLife = cServer.mfGetMyOriginalLife() if fCurrentLife < fOriginalLife: G.nDeathsDueToShock += 1 G.lDeathsDueToShock.append(mysServerID) else: bResult = False return bResult
def fnoOpenDb(mysDbFilename): ''' If the db file exists, read it. If not, write an empty copy. ''' global sDbName sDbName = mysDbFilename # First, make sure there is a place to put our file. sDirName = os.path.dirname(mysDbFilename) if not os.path.isdir(sDirName): os.mkdir(sDirName) if os.path.isfile(mysDbFilename) and os.path.getsize(mysDbFilename) > 0: NTRC.ntracef(3, "SRLB", "proc open json for read|%s|" % (mysDbFilename)) """ with filelock.FileLock(mysDbFilename): # File present, try to read it as json. fh = open(mysDbFilename, "r") sDbContent = "".join(fh.readlines()) NTRC.ntracef(3, "SRLB", "proc file content|%s|" % (sDbContent)) try: dDb = json.loads(sDbContent) except ValueError: raise ValueError(("Error: file|%s| is not valid JSON" % (mysDbFilename))) """ with filelock.FileLock(mysDbFilename): dDb = fndReadRetryLock(mysDbFilename) else: # File not there yet, write it. try: NTRC.ntracef(3, "SRLB", "proc open json for write|%s|" % (mysDbFilename)) with filelock.FileLock(mysDbFilename): fh = open(mysDbFilename, "wb") dDb = copy.deepcopy(dDbEmpty) json.dump(dDb, fh) except IOError: raise IOError(("Error: cannot create new json file|%s|" % (mysDbFilename))) return dDb
def mAddDocument(self, mysDocID, mysClientID): ''' Find a shelf with room for the doc, or create one. Put the doc on the shelf, decrement the remaining space. ''' # If the server is already dead, do not accept any documents. if not self.bDead: cDoc = G.dID2Document[mysDocID] nSize = cDoc.nSize # Find a shelf with sufficient empty space and place the doc there. cShelf = None for sShelfID in self.lShelfIDs: cShelf = G.dID2Shelf[sShelfID] bResult = cShelf.mAcceptDocument(mysDocID, nSize, mysClientID) if bResult: break # True = doc has been stored else: continue # False = no, try another shelf, if any else: # If no more shelves, create another and use it. sNewShelfID = self.mCreateShelf() self.lShelfIDs.append(sNewShelfID) cShelf = G.dID2Shelf[sNewShelfID] sShelfID = cShelf.ID # TODO: #Why not just use sNewShelfID? result = cShelf.mAcceptDocument(mysDocID, nSize, mysClientID) # Record that the doc has been stored on this server. self.lDocIDsComplete.append(mysDocID) self.bInUse = True self.lDocIDs.append(mysDocID) self.dDocIDs[mysDocID] = mysClientID NTRC.tracef( 3, "SERV", "proc mAddDocument serv|%s| id|%s| " "docid|%s| size|%s| assigned to shelfid|%s| remaining|%s|" % (self.sName, self.ID, mysDocID, cDoc.nSize, sShelfID, cShelf.nFreeSpace)) return self.ID + "+" + sShelfID + "+" + mysDocID else: NTRC.ntracef( 3, "SERV", "proc mAddDocument1 dead server|%s| do not " "add doc|%s| for client|%s|" % (self.ID, mysDocID, mysClientID)) return False
def fntDoOneLine(mysLine, mynProc, mynLine): """Execute one single-line command. Input: single line of command. Output: tuple of the (Popen PIPE return code, command return code, list of output lines as strings. Contributes line(s) to be in log file. Input lines and the first line of output blocks have timestamps; other lines in output blocks are indented with spaces. """ sTimeBegin = fnsGetTimestamp() proc = (subprocess.Popen(mysLine , stdout=subprocess.PIPE , close_fds=True # The default anyway, I think. , stderr=subprocess.DEVNULL , universal_newlines=True , shell=True) ) (sProcOut, sProcErr) = proc.communicate() proc.stdout.close() if not sProcErr: sProcErr = "" sTimeEnd = fnsGetTimestamp() # Format lines for output by timestamping or indenting each line. sOut = ("-"*len(sTimeBegin) + "\n" + sTimeBegin + " " + "$ " + mysLine + "\n") lTmpOut1 = sProcOut.rstrip().split("\n") lTmpOut2 = [fnsStampLine(sTimeEnd, sLine, (i==0)) for i,sLine in enumerate(lTmpOut1)] sOut += "\n".join(lTmpOut2) sOut += sProcErr.rstrip() # Collect and return everything to caller. nCmdStat = "n/a - RBL" nReturnCode = proc.returncode lOut = sOut.split("\n") NTRC.ntracef(4, "DO1L", "proc DoOneLine case|%s| line|%s| " "sline|%s| lResult|%s|" % (mynProc, mynLine, mysLine, lOut)) return(tLineOut(callstatus=nReturnCode, cmdstatus=nCmdStat , linenr=mynLine, casenr=mynProc, ltext=lOut))
def mReduceSomeServerLifetimes(self, mynSpan, mynImpact): ''' Find a shockspan-wide subset of servers and reduce their expected lifetimes by the stated reduction percentage. ''' lServersToShock = server.CServer.fnlSelectServerVictims(mynSpan) fReduction = mynImpact * 1.0 / 100.0 NTRC.ntracef(3, "SHOK", "proc reduce servers|%s| by|%s|" % (lServersToShock, fReduction)) for sServerID in lServersToShock: lg.logInfo("SHOCK ", "t|%6.0f| reduce svr|%s| life by pct|%s|" % (G.env.now, sServerID, self.nImpact)) cServer = G.dID2Server[sServerID] fOriginalLife = float(cServer.mfGetMyOriginalLife()) if fOriginalLife > 0: self.mReduceSingleServerLifetime(sServerID, fReduction) self.lsServersShocked.append(sServerID) else: lg.logInfo("SHOCK ", "t|%6.0f| cannot reduce svr|%s| life|%.0f|" % (G.env.now, sServerID, fOriginalLife))
def fnvReportCaseInstructions(mytInstr): '''Print the details for the case: ''' (runid, dInstr) = (mytInstr.runid, mytInstr.casedict) (lCmds) = (mytInstr.cmdlist) (sLogDir, sLogName) = (mytInstr.logdir, mytInstr.logname) (nCopies, nLifem) = (dInstr["nCopies"],dInstr["nLifem"]) (nAuditFreq, nAuditSegments) = (dInstr["nAuditFreq"] , dInstr["nAuditSegments"]) (nShockFreq, nShockImpact, nShockSpan) = (dInstr["nShockFreq"] , dInstr["nShockImpact"] , dInstr["nShockSpan"] ) NTRC.ntracef(0, "STRT", "proc main commands run|%s| " "ncopies|%s| lifem|%s| audit|%s|seg|%s|" "\n1-|%s|\n2-dir|%s| log|%s|" % (runid, nCopies, nLifem, nAuditFreq, nAuditSegments, lCmds, sLogDir, sLogName) ) return
def _wait(self): """ Calls a callback function after time has elapsed. Also handles Interrupt exception. This also invokes an interrupt notification function in the same way. Note that the yield here doesn't cause its caller to wait, but only delays calling the callback routine. """ try: yield self.env.timeout(self._delay) if self.callbackfn: self.callbackfn(self, self._context) self.running = False except simpy.Interrupt as i: NTRC.ntracef(3,"RTIM","Interrupted %s at %s!" % (self, self.env.now)) if self.interruptfn: self.interruptfn(self, self._context) self.canceled = True self.running = False NTRC.ntracef(3,"RTIM","proc exit _wait action|%s| running|%s| canceled|%s| t=%s" % (self.action, self.running, self.canceled, self.env.now))
def fnbWaitForOpening(gl, mynWaitTimeMsec, mynWaitMax): '''How many active jobs? If maxed out, wait for an empty slot and try again. ''' nWait = mynWaitMax while nWait: nAlive = fnnHowManyAlive(gl) if nAlive < gl.nParallel: break else: nWait -= 1 gl.nWaitedForSlot += 1 if gl.bDebugPrint: print(".", end='') # DEBUG time.sleep(mynWaitTimeMsec / 1000.0) NTRC.ntracef(5, "WAIT", "proc waitforopening timesleft|%s| " "nwaited|%s|" % (nWait, gl.nWaitedForSlot)) # Have we waited too long for an opening? if nWait <= 0: raise ValueError("Waited too long for empty job slot.") else: return (nWait > 0)
def fdGetParams(mysFile,mylGuide): ''' fdGetParams() Return a dictionary of entries from a CSV file according to the specified format. Generally, the dict has a string or int key and returns a list. The list may contain more lists. Remove blank lines and comment lines (#) Integers in this case drive me nucking futs. Everything from a CSV file is a string, but some of the dict keys returned and many of the dict values returned are to be used as ints. One must carefully convert anything that might look like an int to a real one. ''' dParams = dict() # If there is no file of the right name, then return None. try: fh = open(mysFile,"r") fh.close() except (ValueError, IOError): NTRC.ntracef(3,"READ","proc fdGetParams1 file not found |%s|" % (mysFile)) dParams = None # If there is such a file, then parse it and return its dictionary. (sKey,lCols) = mylGuide if dParams == None: pass else: with open(mysFile,"rb") as fhInfile: lLines = fhInfile.readlines() # Remove comments and blank lines. for sLine in lLines[:]: if re.match("^ *#",sLine) \ or re.match("^ *$",sLine.rstrip()): lLines.remove(sLine) NTRC.ntracef(3,"READ","proc fdGetParams3 remove comment or blank line |%s|" % (sLine.strip())) # Now get the CSV args into a list of dictionaries. lRowDicts = csv.DictReader(lLines) for dRow in lRowDicts: dNewRow = dict() # Sanitize (i.e., re-integerize) the entire row dict, # keys and values, and use the new version. for xKey in dRow: dNewRow[fnIntPlease(xKey)] = fnIntPlease(dRow[xKey]) intKey = dNewRow[sKey] if intKey not in dParams: dParams[intKey] = [] lVal = list() for sCol in lCols: # Many of the values might be ints. lVal.append(dNewRow[sCol]) dParams[intKey].append(lVal) NTRC.ntracef(5,"READ","proc fdGetParams2 mylGuide|%s|dRow|%s|intKey|%s|lVal|%s|dParams|%s|" % (mylGuide,dRow,intKey,lVal,dParams)) return dParams
def fntDoOneCase(mytInstruction, qToUse): """Input: list of instructions generated by the broker for this case; multiprocessing queue through which to report results. Remove blanks, comments, etc., from the instructions. Each line that is not blank or comment is a command to be executed. Blanks and comments are written directly into the output. Output: list of commands and their output, sent to the supplied queue. The text will also be written to a log file for the case. This function will be a multiprocessing external process. """ sWhoami = multiprocessing.current_process().name NTRC.ntracef(3, "DO1", "proc procname|%s|" % (sWhoami)) nProc = fnsGetProcessNumber(sWhoami) lResults = [] # list of strings # Unpack instruction command list and other items. lInstruction = mytInstruction.cmdlist (sLogfileDir, sLogfileName) = (mytInstruction.logdir , mytInstruction.logname) # Process all command lines of the instruction list and collect results. for nLine, sLine in enumerate(lInstruction): if fnbDoNotIgnoreLine(sLine): # Genuine line; execute and collect answer line(s). tAnswer = fntDoOneLine(sLine, nProc, nLine) (nRtn, nErr, lResult) = (tAnswer.callstatus , tAnswer.cmdstatus , tAnswer.ltext) lResults.extend(lResult) NTRC.ntracef(4, "DO1", "proc DoOneCase case|%s| line|%s| " "lResult|%s|" % (nProc, nLine, lResult)) else: # Comment or blank line; just append to results. lResults.extend([("-"*len(fnsGetTimestamp())) , (fnsGetTimestamp() + " " + sLine)]) NTRC.ntracef(4, "DO1", "proc DoOneCase case|%s| line|%s| " "comment|%s|" % (nProc, nLine, sLine)) fnWriteLogFile(nProc, (lResults), sLogfileDir, sLogfileName) lPrefix = [("BEGIN results from " + sWhoami)] lSuffix = [("ENDOF results from " + sWhoami)] lResultsToSee = ['\n'] + lPrefix + lResults + lSuffix + ['\n'] tAnswers = tLinesOut(procname=sWhoami, listoflists=lResultsToSee) qToUse.put(tAnswers) qToUse.close() return (tAnswers)
def mPlaceCollection(self,mysCollID): '''\ Get list of servers available at the right quality level. Select the policy-specified number of them. Send the collection to each server in turn. ''' cColl = G.dID2Collection[mysCollID] nCollValue = cColl.nValue # Yes, bad, reaching into the colletion's knickers. lServersForCollection = self.mSelectServersForCollection(nCollValue) if not lServersForCollection: raise IndexError( 'BUGCHECK not enough servers for initial collection placement.') # The distribution params have already limited the # set of servers in the select-for-collection routine. self.lServersToUse = lServersForCollection ''' If there aren't servers enough at this level, the Select method will raise an exception. ''' NTRC.ntracef(3, "CLI", "proc mPlaceCollection1 client|%s| " "place coll|%s| to|%d|servers" % (self.ID,mysCollID,len(self.lServersToUse))) # Distribute collection to a set of servers. for sServerID in self.lServersToUse: NTRC.ntracef(3, "CLI", "proc mPlaceCollection2 client|%s| " "send coll|%s| to server|%s|" % (self.ID, mysCollID, sServerID)) NTRC.ntracef(3, "SHOW", "proc mPlaceCollection2 client|%s| " "send coll|%s| to server|%s|" % (self.ID, mysCollID, sServerID)) # Send copy of collection to server. nDocs = self.mPlaceCollectionOnServer(mysCollID, sServerID) # Initialize the auditing process for this collection. if G.nAuditCycleInterval > 0: self.cAudit = fAudit_Select(G.sAuditStrategy, self.ID,mysCollID, G.nAuditCycleInterval) return self.lServersToUse
def fntProcessOneInstruction(mysRunNumber, mydInstruction, mynSeed): ''' Process one single instruction for one run. If just testing today, print instruction contents but do not run it. If the instruction has already been processed, skip over it unless the user requires it to be redone. ''' sInstructionId = str(mydInstruction["_id"]) # If the user specifies, redo this case even if was done before. if g.sRedo.startswith("Y"): NTRC.ntracef(0,"MAIN","proc force redo for item id|%s|" % (sInstructionId)) # Delete the done record for this run, if there is one, # to make it appear that the run is new this time. g.mdb.fndDeleteDoneRecord(sInstructionId) # If this instruction has already been processed, skip it. bIsItDone = g.mdb.fnbIsItDone(sInstructionId) if bIsItDone: NTRC.ntracef(0,"MAIN","proc skip item already done run|%s| " "id|%s| copies|%s| lifem|%s|" % (mysRunNumber, sInstructionId, mydInstruction["nCopies"], mydInstruction["nLifem"])) # Testing: Just dump out the instruction dictionary for this item. elif g.sListOnly.startswith("Y"): NTRC.ntracef(0,"MAIN","proc ListOnly, item run|%s| " "ncopies|%s| lifem|%s| id|%s| dict|%s|" % (mysRunNumber, mydInstruction["nCopies"], mydInstruction["nLifem"], sInstructionId, list(util.fngSortDictItemsByKeys(mydInstruction)))) # Real life: execute the instruction. Well, put it in the list, anyway. else: mydInstruction["nRandomSeed"] = mynSeed # Format commands to be executed by actor. g.sShelfLogFileName = g.cFmt.msGentlyFormat( g.sShelfLogFileTemplate, mydInstruction, g, CG) g.lCommands = [] for sTemplate in g.lTemplates: sCmd = g.cFmt.msGentlyFormat(sTemplate, mydInstruction, g, CG) g.lCommands.append(sCmd) # Make instruction file for the actor. # BZZZT: This is probably irrelevant in newbroker. g.sActorCmdFileName = g.cFmt.msGentlyFormat( g.sActorCmdFileTemplate, mydInstruction, g, CG) g.sActorCommand = g.cFmt.msGentlyFormat( g.sActorCmdTemplate, mydInstruction, g, CG) g.sActorLogDir = g.cFmt.msGentlyFormat( g.sActorLogDirTemplate, mydInstruction, g, CG) NTRC.ntracef(0, "MAIN", "proc main commands run|%s| " "ncopies|%s| lifem|%s| audit|%s| " "segs|%s|\n1-|%s|\n2-dir|%s| log|%s|" % (mysRunNumber, mydInstruction["nCopies"], mydInstruction["nLifem"], mydInstruction["nAuditFreq"], mydInstruction["nAuditSegments"], g.lCommands, g.sActorLogDir, g.sShelfLogFileName)) # Create file for actor, maybe just comments. with open(g.sActorCmdFileName, 'w') as fhActorCmdFile: fhActorCmdFile.write( "# ListActor command file, " "automatically generated by broker. " "Do not edit.\n") for sCommand in g.lCommands: sLine = g.cFmt.fnsMaybeTest(sCommand, g) print(sLine, file=fhActorCmdFile) # Record that this job will soon be running. mydInstruction["starttime"] = util.fnsGetTimeStamp() g.mdb.fndInsertProgressRecord(mydInstruction["_id"], mydInstruction) # Return the full instruction. tThisInst = tInstruction(casedict=mydInstruction , cmdlist=g.lCommands , logname=g.sShelfLogFileName + "_case.log" , logdir=g.sActorLogDir , runid=mysRunNumber ) return tThisInst