def _sortChunk(records, key, chunkIndex, fields): """Sort in memory chunk of records records - a list of records read from the original dataset key - a list of indices to sort the records by chunkIndex - the index of the current chunk The records contain only the fields requested by the user. _sortChunk() will write the sorted records to a standard File named "chunk_<chunk index>.csv" (chunk_0.csv, chunk_1.csv,...). """ title(additional='(key=%s, chunkIndex=%d)' % (str(key), chunkIndex)) assert len(records) > 0 # Sort the current records records.sort(key=itemgetter(*key)) # Write to a chunk file if chunkIndex is not None: filename = 'chunk_%d.csv' % chunkIndex with FileRecordStream(filename, write=True, fields=fields) as o: for r in records: o.appendRecord(r) assert os.path.getsize(filename) > 0 return records
def _step(self): """Run the network for one iteration.""" title() self.runCount = 1 self.experiment.pause = False self._runExperiment() self.pause = True
def onPhaseTeardown(self, exp): title() index = exp.position.phase # Last phase if index == len(exp.workflow) - 1: self.done = True
def onIter(self, exp, i): """ """ title(additional='(), self.pause = ' + str(self.pause)) self.iteration += 1 # check if the pause button was clicked if self.pause: exp.pause = True elif self.runCount is not None: self.runCount -= 1 if self.runCount == 0: exp.pause = True runtimelistener.listenersEnabled = exp.pause
def onPhaseSetup(self, exp): title() self.iteration = 0 self.phase = self._getPhase(exp) phase = self.phase[1] self.iterationCount = phase[0]['iterationCount'] if len(phase) > 0 else 0 if self.pauseAtNextStep and self.pauseAtPhaseSetup: #if self.pauseAtNextStep: #from dbgp.client import brk; brk(port=9011) exp.pause = True self.pause = True self.pauseAtPhaseSetup = False else: self.pauseAtPhaseSetup = True
def onPhaseSetup(self, exp): title() self.iteration = 0 self.phase = self._getPhase(exp) phase = self.phase[1] self.iterationCount = phase[0]['iterationCount'] if len( phase) > 0 else 0 if self.pauseAtNextStep and self.pauseAtPhaseSetup: #if self.pauseAtNextStep: #from dbgp.client import brk; brk(port=9011) exp.pause = True self.pause = True self.pauseAtPhaseSetup = False else: self.pauseAtPhaseSetup = True
def _mergeFiles(key, chunkCount, outputFile, fields): """Merge sorted chunk files into a sorted output file chunkCount - the number of available chunk files outputFile the name of the sorted output file _mergeFiles() """ title() # Open all chun files files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)] # Open output file with FileRecordStream(outputFile, write=True, fields=fields) as o: # Open all chunk files files = [ FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount) ] records = [f.getNextRecord() for f in files] # This loop will run until all files are exhausted while not all(r is None for r in records): # Cleanup None values (files that were exhausted) indices = [i for i, r in enumerate(records) if r is not None] records = [records[i] for i in indices] files = [files[i] for i in indices] # Find the current record r = min(records, key=itemgetter(*key)) # Write it to the file o.appendRecord(r) # Find the index of file that produced the current record index = records.index(r) # Read a new record from the file records[index] = files[index].getNextRecord() # Cleanup chunk files for i, f in enumerate(files): f.close() os.remove('chunk_%d.csv' % i)
def _mergeFiles(key, chunkCount, outputFile, fields): """Merge sorted chunk files into a sorted output file chunkCount - the number of available chunk files outputFile the name of the sorted output file _mergeFiles() """ title() # Open all chun files files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)] # Open output file with FileRecordStream(outputFile, write=True, fields=fields) as o: # Open all chunk files files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)] records = [f.getNextRecord() for f in files] # This loop will run until all files are exhausted while not all(r is None for r in records): # Cleanup None values (files that were exhausted) indices = [i for i,r in enumerate(records) if r is not None] records = [records[i] for i in indices] files = [files[i] for i in indices] # Find the current record r = min(records, key=itemgetter(*key)) # Write it to the file o.appendRecord(r) # Find the index of file that produced the current record index = records.index(r) # Read a new record from the file records[index] = files[index].getNextRecord() # Cleanup chunk files for i, f in enumerate(files): f.close() os.remove('chunk_%d.csv' % i)