class _BasicPredictionWriter(PredictionWriterIface): """ This class defines the basic (file-based) implementation of PredictionWriterIface, whose instances are returned by BasicPredictionWriterFactory """ def __init__(self, experimentDir, label, inferenceType, fields, metricNames=None, checkpointSource=None): """ Constructor experimentDir: experiment directory path that contains description.py label: A label string to incorporate into the filename. inferenceElements: inferenceType: An constant from opfutils.InferenceType for the requested prediction writer fields: a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo representing fields that will be emitted to this prediction writer metricNames: OPTIONAL - A list of metric names that well be emiited by this prediction writer checkpointSource: If not None, a File-like object containing the previously-checkpointed predictions for setting the initial contents of this PredictionOutputStream. Will be copied before returning, if needed. """ #assert len(fields) > 0 self.__experimentDir = experimentDir # opfutils.InferenceType kind value self.__inferenceType = inferenceType # A tuple of nupic.data.fieldmeta.FieldMetaInfo self.__inputFieldsMeta = tuple(copy.deepcopy(fields)) self.__numInputFields = len(self.__inputFieldsMeta) self.__label = label if metricNames is not None: metricNames.sort() self.__metricNames = metricNames # Define our output field meta info self.__outputFieldsMeta = [] # The list of inputs that we include in the prediction output self._rawInputNames = [] # Output dataset self.__datasetPath = None self.__dataset = None # Save checkpoint data until we're ready to create the output dataset self.__checkpointCache = None if checkpointSource is not None: checkpointSource.seek(0) self.__checkpointCache = StringIO.StringIO() shutil.copyfileobj(checkpointSource, self.__checkpointCache) return ############################################################################ def __openDatafile(self, modelResult): """Open the data file and write the header row""" # Write reset bit resetFieldMeta = FieldMetaInfo(name="reset", type=FieldMetaType.integer, special=FieldMetaSpecial.reset) self.__outputFieldsMeta.append(resetFieldMeta) # ----------------------------------------------------------------------- # Write each of the raw inputs that go into the encoders rawInput = modelResult.rawInput rawFields = rawInput.keys() rawFields.sort() for field in rawFields: if field.startswith('_') or field == 'reset': continue value = rawInput[field] meta = FieldMetaInfo(name=field, type=FieldMetaType.string, special=FieldMetaSpecial.none) self.__outputFieldsMeta.append(meta) self._rawInputNames.append(field) # ----------------------------------------------------------------------- # Handle each of the inference elements for inferenceElement, value in modelResult.inferences.iteritems(): inferenceLabel = InferenceElement.getLabel(inferenceElement) # TODO: Right now we assume list inferences are associated with # The input field metadata if type(value) in (list, tuple): # Append input and prediction field meta-info self.__outputFieldsMeta.extend( self.__getListMetaInfo(inferenceElement)) elif isinstance(value, dict): self.__outputFieldsMeta.extend( self.__getDictMetaInfo(inferenceElement, value)) else: if InferenceElement.getInputElement(inferenceElement): self.__outputFieldsMeta.append( FieldMetaInfo(name=inferenceLabel + ".actual", type=FieldMetaType.string, special='')) self.__outputFieldsMeta.append( FieldMetaInfo(name=inferenceLabel, type=FieldMetaType.string, special='')) if self.__metricNames: for metricName in self.__metricNames: metricField = FieldMetaInfo(name=metricName, type=FieldMetaType.float, special=FieldMetaSpecial.none) self.__outputFieldsMeta.append(metricField) # Create the inference directory for our experiment inferenceDir = _FileUtils.createExperimentInferenceDir( self.__experimentDir) # Consctruct the prediction dataset file path filename = (self.__label + "." + opfutils.InferenceType.getLabel(self.__inferenceType) + ".predictionLog.csv") self.__datasetPath = os.path.join(inferenceDir, filename) # Create the output dataset print "OPENING OUTPUT FOR PREDICTION WRITER AT: %r" % self.__datasetPath print "Prediction field-meta: %r" % ( [tuple(i) for i in self.__outputFieldsMeta], ) self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True, fields=self.__outputFieldsMeta) # Copy data from checkpoint cache if self.__checkpointCache is not None: self.__checkpointCache.seek(0) reader = csv.reader(self.__checkpointCache, dialect='excel') # Skip header row try: header = reader.next() except StopIteration: print "Empty record checkpoint initializer for %r" % ( self.__datasetPath, ) else: assert tuple(self.__dataset.getFieldNames()) == tuple(header), \ "dataset.getFieldNames(): %r; predictionCheckpointFieldNames: %r" % ( tuple(self.__dataset.getFieldNames()), tuple(header)) # Copy the rows from checkpoint numRowsCopied = 0 while True: try: row = reader.next() except StopIteration: break #print "DEBUG: restoring row from checkpoint: %r" % (row,) self.__dataset.appendRecord(row) numRowsCopied += 1 self.__dataset.flush() print "Restored %d rows from checkpoint for %r" % ( numRowsCopied, self.__datasetPath) # Dispose of our checkpoint cache self.__checkpointCache.close() self.__checkpointCache = None return ############################################################################ def setLoggedMetrics(self, metricNames): """ Tell the writer which metrics should be written Parameters: ----------------------------------------------------------------------- metricsNames: A list of metric lables to be written """ if metricNames is None: self.__metricNames = set([]) else: self.__metricNames = set(metricNames) ############################################################################ def close(self): """ [virtual method override] Closes the writer (e.g., close the underlying file) """ if self.__dataset: self.__dataset.close() self.__dataset = None return ############################################################################ def __getListMetaInfo(self, inferenceElement): """ Get field metadata information for inferences that are of list type TODO: Right now we assume list inferences are associated with the input field metadata """ fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) for inputFieldMeta in self.__inputFieldsMeta: if InferenceElement.getInputElement(inferenceElement): outputFieldMeta = FieldMetaInfo(name=inputFieldMeta.name + ".actual", type=inputFieldMeta.type, special=inputFieldMeta.special) predictionField = FieldMetaInfo(name=inputFieldMeta.name + "." + inferenceLabel, type=inputFieldMeta.type, special=inputFieldMeta.special) fieldMetaInfo.append(outputFieldMeta) fieldMetaInfo.append(predictionField) return fieldMetaInfo ############################################################################ def __getDictMetaInfo(self, inferenceElement, inferenceDict): """Get field metadate information for inferences that are of dict type""" fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) if InferenceElement.getInputElement(inferenceElement): fieldMetaInfo.append( FieldMetaInfo(name=inferenceLabel + ".actual", type=FieldMetaType.string, special='')) keys = sorted(inferenceDict.keys()) for key in keys: fieldMetaInfo.append( FieldMetaInfo(name=inferenceLabel + "." + str(key), type=FieldMetaType.string, special='')) return fieldMetaInfo ############################################################################ def append(self, modelResult): """ [virtual method override] Emits a single prediction as input versus predicted. modelResult: An opfutils.ModelResult object that contains the model input and output for the current timestep. """ #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,) # If there are no inferences, don't write anything inferences = modelResult.inferences hasInferences = False if inferences is not None: for value in inferences.itervalues(): hasInferences = hasInferences or (value is not None) if not hasInferences: return if self.__dataset is None: self.__openDatafile(modelResult) inputData = modelResult.sensorInput sequenceReset = int(bool(inputData.sequenceReset)) outputRow = [sequenceReset] # ----------------------------------------------------------------------- # Write out the raw inputs rawInput = modelResult.rawInput for field in self._rawInputNames: outputRow.append(str(rawInput[field])) # ----------------------------------------------------------------------- # Write out the inference element info for inferenceElement, outputVal in inferences.iteritems(): inputElement = InferenceElement.getInputElement(inferenceElement) if inputElement: inputVal = getattr(inputData, inputElement) else: inputVal = None if type(outputVal) in (list, tuple): assert type(inputVal) in (list, tuple, None) for iv, ov in zip(inputVal, outputVal): # Write actual outputRow.append(str(iv)) # Write inferred outputRow.append(str(ov)) elif isinstance(outputVal, dict): if inputVal is not None: # If we have a predicted field, include only that in the actuals if modelResult.predictedFieldIdx is not None: outputRow.append( str(inputVal[modelResult.predictedFieldIdx])) else: outputRow.append(str(inputVal)) for key in sorted(outputVal.keys()): outputRow.append(str(outputVal[key])) else: if inputVal is not None: outputRow.append(str(inputVal)) outputRow.append(str(outputVal)) metrics = modelResult.metrics for metricName in self.__metricNames: outputRow.append(metrics.get(metricName, 0.0)) #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,) self.__dataset.appendRecord(outputRow) self.__dataset.flush() return def checkpoint(self, checkpointSink, maxRows): """ [virtual method override] Save a checkpoint of the prediction output stream. The checkpoint comprises up to maxRows of the most recent inference records. Parameters: ---------------------------------------------------------------------- checkpointSink: A File-like object where predictions checkpoint data, if any, will be stored. maxRows: Maximum number of most recent inference rows to checkpoint. """ checkpointSink.truncate() if self.__dataset is None: if self.__checkpointCache is not None: self.__checkpointCache.seek(0) shutil.copyfileobj(self.__checkpointCache, checkpointSink) checkpointSink.flush() return else: # Nothing to checkpoint return self.__dataset.flush() totalDataRows = self.__dataset.getDataRowCount() if totalDataRows == 0: # Nothing to checkpoint return # Open reader of prediction file (suppress missingValues conversion) reader = FileRecordStream(self.__datasetPath, missingValues=[]) # Create CSV writer for writing checkpoint rows writer = csv.writer(checkpointSink) # Write the header row to checkpoint sink -- just field names writer.writerow(reader.getFieldNames()) # Determine number of rows to checkpoint numToWrite = min(maxRows, totalDataRows) # Skip initial rows to get to the rows that we actually need to checkpoint numRowsToSkip = totalDataRows - numToWrite for i in xrange(numRowsToSkip): reader.next() # Write the data rows to checkpoint sink numWritten = 0 while True: row = reader.getNextRecord() if row is None: break row = [str(element) for element in row] #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,) writer.writerow(row) numWritten += 1 assert numWritten == numToWrite, \ "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite) checkpointSink.flush() return
class _BasicPredictionWriter(PredictionWriterIface): """ This class defines the basic (file-based) implementation of PredictionWriterIface, whose instances are returned by BasicPredictionWriterFactory """ def __init__(self, experimentDir, label, inferenceType, fields, metricNames=None, checkpointSource=None): """ Constructor experimentDir: experiment directory path that contains description.py label: A label string to incorporate into the filename. inferenceElements: inferenceType: An constant from opfutils.InferenceType for the requested prediction writer fields: a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo representing fields that will be emitted to this prediction writer metricNames: OPTIONAL - A list of metric names that well be emiited by this prediction writer checkpointSource: If not None, a File-like object containing the previously-checkpointed predictions for setting the initial contents of this PredictionOutputStream. Will be copied before returning, if needed. """ #assert len(fields) > 0 self.__experimentDir = experimentDir # opfutils.InferenceType kind value self.__inferenceType = inferenceType # A tuple of nupic.data.fieldmeta.FieldMetaInfo self.__inputFieldsMeta = tuple(copy.deepcopy(fields)) self.__numInputFields = len(self.__inputFieldsMeta) self.__label = label if metricNames is not None: metricNames.sort() self.__metricNames = metricNames # Define our output field meta info self.__outputFieldsMeta = [] # The list of inputs that we include in the prediction output self._rawInputNames = [] # Output dataset self.__datasetPath = None self.__dataset = None # Save checkpoint data until we're ready to create the output dataset self.__checkpointCache = None if checkpointSource is not None: checkpointSource.seek(0) self.__checkpointCache = StringIO.StringIO() shutil.copyfileobj(checkpointSource, self.__checkpointCache) return def __openDatafile(self, modelResult): """Open the data file and write the header row""" # Write reset bit resetFieldMeta = FieldMetaInfo( name="reset", type=FieldMetaType.integer, special = FieldMetaSpecial.reset) self.__outputFieldsMeta.append(resetFieldMeta) # ----------------------------------------------------------------------- # Write each of the raw inputs that go into the encoders rawInput = modelResult.rawInput rawFields = rawInput.keys() rawFields.sort() for field in rawFields: if field.startswith('_') or field == 'reset': continue value = rawInput[field] meta = FieldMetaInfo(name=field, type=FieldMetaType.string, special=FieldMetaSpecial.none) self.__outputFieldsMeta.append(meta) self._rawInputNames.append(field) # ----------------------------------------------------------------------- # Handle each of the inference elements for inferenceElement, value in modelResult.inferences.iteritems(): inferenceLabel = InferenceElement.getLabel(inferenceElement) # TODO: Right now we assume list inferences are associated with # The input field metadata if type(value) in (list, tuple): # Append input and prediction field meta-info self.__outputFieldsMeta.extend(self.__getListMetaInfo(inferenceElement)) elif isinstance(value, dict): self.__outputFieldsMeta.extend(self.__getDictMetaInfo(inferenceElement, value)) else: if InferenceElement.getInputElement(inferenceElement): self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel+".actual", type=FieldMetaType.string, special = '')) self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel, type=FieldMetaType.string, special = '')) if self.__metricNames: for metricName in self.__metricNames: metricField = FieldMetaInfo( name = metricName, type = FieldMetaType.float, special = FieldMetaSpecial.none) self.__outputFieldsMeta.append(metricField) # Create the inference directory for our experiment inferenceDir = _FileUtils.createExperimentInferenceDir(self.__experimentDir) # Consctruct the prediction dataset file path filename = (self.__label + "." + opfutils.InferenceType.getLabel(self.__inferenceType) + ".predictionLog.csv") self.__datasetPath = os.path.join(inferenceDir, filename) # Create the output dataset print "OPENING OUTPUT FOR PREDICTION WRITER AT: {0!r}".format(self.__datasetPath) print "Prediction field-meta: {0!r}".format([tuple(i) for i in self.__outputFieldsMeta]) self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True, fields=self.__outputFieldsMeta) # Copy data from checkpoint cache if self.__checkpointCache is not None: self.__checkpointCache.seek(0) reader = csv.reader(self.__checkpointCache, dialect='excel') # Skip header row try: header = reader.next() except StopIteration: print "Empty record checkpoint initializer for {0!r}".format(self.__datasetPath) else: assert tuple(self.__dataset.getFieldNames()) == tuple(header), \ "dataset.getFieldNames(): {0!r}; predictionCheckpointFieldNames: {1!r}".format( tuple(self.__dataset.getFieldNames()), tuple(header)) # Copy the rows from checkpoint numRowsCopied = 0 while True: try: row = reader.next() except StopIteration: break #print "DEBUG: restoring row from checkpoint: %r" % (row,) self.__dataset.appendRecord(row) numRowsCopied += 1 self.__dataset.flush() print "Restored {0:d} rows from checkpoint for {1!r}".format( numRowsCopied, self.__datasetPath) # Dispose of our checkpoint cache self.__checkpointCache.close() self.__checkpointCache = None return def setLoggedMetrics(self, metricNames): """ Tell the writer which metrics should be written Parameters: ----------------------------------------------------------------------- metricsNames: A list of metric lables to be written """ if metricNames is None: self.__metricNames = set([]) else: self.__metricNames = set(metricNames) def close(self): """ [virtual method override] Closes the writer (e.g., close the underlying file) """ if self.__dataset: self.__dataset.close() self.__dataset = None return def __getListMetaInfo(self, inferenceElement): """ Get field metadata information for inferences that are of list type TODO: Right now we assume list inferences are associated with the input field metadata """ fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) for inputFieldMeta in self.__inputFieldsMeta: if InferenceElement.getInputElement(inferenceElement): outputFieldMeta = FieldMetaInfo( name=inputFieldMeta.name + ".actual", type=inputFieldMeta.type, special=inputFieldMeta.special ) predictionField = FieldMetaInfo( name=inputFieldMeta.name + "." + inferenceLabel, type=inputFieldMeta.type, special=inputFieldMeta.special ) fieldMetaInfo.append(outputFieldMeta) fieldMetaInfo.append(predictionField) return fieldMetaInfo def __getDictMetaInfo(self, inferenceElement, inferenceDict): """Get field metadate information for inferences that are of dict type""" fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) if InferenceElement.getInputElement(inferenceElement): fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+".actual", type=FieldMetaType.string, special = '')) keys = sorted(inferenceDict.keys()) for key in keys: fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+"."+str(key), type=FieldMetaType.string, special='')) return fieldMetaInfo def append(self, modelResult): """ [virtual method override] Emits a single prediction as input versus predicted. modelResult: An opfutils.ModelResult object that contains the model input and output for the current timestep. """ #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,) # If there are no inferences, don't write anything inferences = modelResult.inferences hasInferences = False if inferences is not None: for value in inferences.itervalues(): hasInferences = hasInferences or (value is not None) if not hasInferences: return if self.__dataset is None: self.__openDatafile(modelResult) inputData = modelResult.sensorInput sequenceReset = int(bool(inputData.sequenceReset)) outputRow = [sequenceReset] # ----------------------------------------------------------------------- # Write out the raw inputs rawInput = modelResult.rawInput for field in self._rawInputNames: outputRow.append(str(rawInput[field])) # ----------------------------------------------------------------------- # Write out the inference element info for inferenceElement, outputVal in inferences.iteritems(): inputElement = InferenceElement.getInputElement(inferenceElement) if inputElement: inputVal = getattr(inputData, inputElement) else: inputVal = None if type(outputVal) in (list, tuple): assert type(inputVal) in (list, tuple, None) for iv, ov in zip(inputVal, outputVal): # Write actual outputRow.append(str(iv)) # Write inferred outputRow.append(str(ov)) elif isinstance(outputVal, dict): if inputVal is not None: # If we have a predicted field, include only that in the actuals if modelResult.predictedFieldName is not None: outputRow.append(str(inputVal[modelResult.predictedFieldName])) else: outputRow.append(str(inputVal)) for key in sorted(outputVal.keys()): outputRow.append(str(outputVal[key])) else: if inputVal is not None: outputRow.append(str(inputVal)) outputRow.append(str(outputVal)) metrics = modelResult.metrics for metricName in self.__metricNames: outputRow.append(metrics.get(metricName, 0.0)) #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,) self.__dataset.appendRecord(outputRow) self.__dataset.flush() return def checkpoint(self, checkpointSink, maxRows): """ [virtual method override] Save a checkpoint of the prediction output stream. The checkpoint comprises up to maxRows of the most recent inference records. Parameters: ---------------------------------------------------------------------- checkpointSink: A File-like object where predictions checkpoint data, if any, will be stored. maxRows: Maximum number of most recent inference rows to checkpoint. """ checkpointSink.truncate() if self.__dataset is None: if self.__checkpointCache is not None: self.__checkpointCache.seek(0) shutil.copyfileobj(self.__checkpointCache, checkpointSink) checkpointSink.flush() return else: # Nothing to checkpoint return self.__dataset.flush() totalDataRows = self.__dataset.getDataRowCount() if totalDataRows == 0: # Nothing to checkpoint return # Open reader of prediction file (suppress missingValues conversion) reader = FileRecordStream(self.__datasetPath, missingValues=[]) # Create CSV writer for writing checkpoint rows writer = csv.writer(checkpointSink) # Write the header row to checkpoint sink -- just field names writer.writerow(reader.getFieldNames()) # Determine number of rows to checkpoint numToWrite = min(maxRows, totalDataRows) # Skip initial rows to get to the rows that we actually need to checkpoint numRowsToSkip = totalDataRows - numToWrite for i in xrange(numRowsToSkip): reader.next() # Write the data rows to checkpoint sink numWritten = 0 while True: row = reader.getNextRecord() if row is None: break; row = [str(element) for element in row] #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,) writer.writerow(row) numWritten +=1 assert numWritten == numToWrite, \ "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite) checkpointSink.flush() return