def testBadDataset(self): filename = _getTempFileName() print 'Creating tempfile:', filename # Write bad dataset with records going backwards in time fields = [ FieldMetaInfo('timestamp', FieldMetaType.datetime, FieldMetaSpecial.timestamp) ] o = FileRecordStream(streamID=filename, write=True, fields=fields) # Records records = ([datetime(day=3, month=3, year=2010)], [datetime(day=2, month=3, year=2010)]) o.appendRecord(records[0]) o.appendRecord(records[1]) o.close() # Write bad dataset with broken sequences fields = [ FieldMetaInfo('sid', FieldMetaType.integer, FieldMetaSpecial.sequence) ] o = FileRecordStream(streamID=filename, write=True, fields=fields) # Records records = ([1], [2], [1]) o.appendRecord(records[0]) o.appendRecord(records[1]) self.assertRaises(Exception, o.appendRecord, (records[2], )) o.close()
def test_WeightedMean(self): # Cleanup old files #for f in glob.glob('*.*'): # if 'auto_specials' in f: # os.remove(f) fields = [ ('dummy1', 'int', ''), ('dummy2', 'int', ''), ('timestamp', 'datetime', 'T'), ] records = ( [10, 1, datetime.datetime(2000, 3, 1)], [5, 2, datetime.datetime(2000, 3, 2)], [1, 100, datetime.datetime(2000, 3, 3)], [2, 4, datetime.datetime(2000, 3, 4)], [4, 1, datetime.datetime(2000, 3, 5)], [4, 0, datetime.datetime(2000, 3, 6)], [5, 0, datetime.datetime(2000, 3, 7)], [6, 0, datetime.datetime(2000, 3, 8)], ) if not os.path.isdir('data'): os.makedirs('data') with FileRecordStream('data/weighted_mean.csv', write=True, fields=fields) \ as o: for r in records: o.appendRecord(r) # Aggregate just the dummy field, all the specials should be added ai = dict(fields=[('dummy1', 'wmean:dummy2', None), ('dummy2', 'mean', None)], days=2) handle = \ tempfile.NamedTemporaryFile(prefix='weighted_mean', suffix='.csv', dir='.') tempFile = handle.name handle.close() outputFile = generateDataset(ai, 'weighted_mean.csv', tempFile) result = [] with FileRecordStream(outputFile) as f: print f.getFields() for r in f: result.append(r) self.assertEqual(result[0][0], 6.0) self.assertEqual(result[0][1], 1.0) self.assertEqual(result[1][0], 1.0) self.assertEqual(result[1][1], 52.0) self.assertEqual(result[2][0], 4.0) self.assertEqual(result[2][1], 0.0) self.assertEqual(result[3][0], None) self.assertEqual(result[3][1], 0.0) return
def testCopyOneRow(self): expectedOutput = ("Timestamp,Value\n" "datetime,int\n" "T,\n" "2011-09-04 02:00:00.000000,1\n" "2011-09-04 02:05:00.000000,2\n" "2011-09-04 02:10:00.000000,2\n" "2011-09-04 02:15:00.000000,3\n" "2011-09-04 02:20:00.000000,4\n" "2011-09-04 02:25:00.000000,5\n" "2011-09-04 02:30:00.000000,6\n") mockInput = MagicMock(return_value=StringIO(self.sampleInput)) output = StringIO() mockOutput = MagicMock(return_value=output) with patch("__builtin__.open", mockInput): inputFile = FileRecordStream("input_path") with patch("__builtin__.open", mockOutput): outputFile = FileRecordStream("output_path", fields=inputFile.getFields(), write=True) anomalyzer.copy(inputFile, outputFile, 1, 1, 1) result = output.getvalue() result = result.replace("\r\n", "\n") result = result.replace("\r", "\n") self.assertSequenceEqual(expectedOutput, result)
def test_AutoSpecialFields(self): # Cleanup old files #for f in glob.glob('*.*'): # if 'auto_specials' in f: # os.remove(f) fields = [ ('dummy', 'string', ''), ('timestamp', 'datetime', 'T'), ('reset', 'int', 'R'), ('sid', 'int', 'S'), ] records = ( ['dummy-1', datetime.datetime(2000, 3, 1), 1, 1], ['dummy-2', datetime.datetime(2000, 3, 2), 0, 1], ['dummy-3', datetime.datetime(2000, 3, 3), 0, 1], ['dummy-4', datetime.datetime(2000, 3, 4), 1, 2], ['dummy-5', datetime.datetime(2000, 3, 5), 0, 2], ) if not os.path.isdir('data'): os.makedirs('data') with FileRecordStream('data/auto_specials.csv', write=True, fields=fields) \ as o: for r in records: o.appendRecord(r) # Aggregate just the dummy field, all the specials should be added ai = dict(fields=[('dummy', lambda x: x[0])], weeks=3) handle = \ tempfile.NamedTemporaryFile(prefix='auto_specials', suffix='.csv', dir='.') tempFile = handle.name handle.close() outputFile = generateDataset(ai, 'auto_specials.csv', tempFile) result = [] with FileRecordStream(outputFile) as f: print f.getFields() for r in f: result.append(r) self.assertEqual(result[0][2], 1) # reset self.assertEqual(result[0][3], 1) # seq id self.assertEqual(result[0][0], 'dummy-1') self.assertEqual(result[1][2], 1) # reset self.assertEqual(result[1][3], 2) # seq id self.assertEqual(result[1][0], 'dummy-4') return
def main(args): inputPath, outputPath, action = args[:3] with FileRecordStream(inputPath) as reader: with FileRecordStream(outputPath, write=True, fields=reader.fields) as writer: assert action in Actions.ACTIONS, USAGE if action == Actions.ADD: assert len(args) == 7, USAGE start = int(args[4]) stop = int(args[5]) column = int(args[3]) valueType = eval(reader.fields[column][1]) value = valueType(args[6]) add(reader, writer, column, start, stop, value) elif action == Actions.SCALE: assert len(args) == 7, USAGE start = int(args[4]) stop = int(args[5]) column = int(args[3]) valueType = eval(reader.fields[column][1]) multiple = valueType(args[6]) scale(reader, writer, column, start, stop, multiple) elif action == Actions.COPY: assert 5 <= len(args) <= 8, USAGE start = int(args[3]) stop = int(args[4]) if len(args) > 5: insertLocation = int(args[5]) else: insertLocation = None if len(args) == 7: tsCol = int(args[6]) else: tsCol = None copy(reader, writer, start, stop, insertLocation, tsCol) elif action == Actions.SAMPLE or action == Actions.SAMPLE2: assert 4 <= len(args) <= 7, USAGE n = int(args[3]) start = None if len(args) > 4: start = int(args[4]) stop = None if len(args) > 5: stop = int(args[5]) tsCol = None if len(args) > 6: tsCol = int(args[6]) writeSampleOnly = action == Actions.SAMPLE sample(reader, writer, n, start, stop, tsCol, writeSampleOnly)
def _createNetwork(): """Create network with one RecordSensor region.""" network = Network() network.addRegion('sensor', 'py.RecordSensor', '{}') sensorRegion = network.regions['sensor'].getSelf() # Add an encoder. encoderParams = { 'consumption': { 'fieldname': 'consumption', 'resolution': 0.88, 'seed': 1, 'name': 'consumption', 'type': 'RandomDistributedScalarEncoder' } } encoder = MultiEncoder() encoder.addMultipleEncoders(encoderParams) sensorRegion.encoder = encoder # Add a data source. testDir = os.path.dirname(os.path.abspath(__file__)) inputFile = os.path.join(testDir, 'fixtures', 'gymdata-test.csv') dataSource = FileRecordStream(streamID=inputFile) sensorRegion.dataSource = dataSource # Get and set what field index we want to predict. predictedIdx = dataSource.getFieldNames().index('consumption') network.regions['sensor'].setParameter('predictedFieldIdx', predictedIdx) return network
def _createNetwork(): """Create a network with a RecordSensor region and a SDRClassifier region""" network = Network() network.addRegion('sensor', 'py.RecordSensor', '{}') network.addRegion('classifier', 'py.SDRClassifierRegion', '{}') _createSensorToClassifierLinks(network, 'sensor', 'classifier') # Add encoder to sensor region. sensorRegion = network.regions['sensor'].getSelf() encoderParams = { 'consumption': { 'fieldname': 'consumption', 'resolution': 0.88, 'seed': 1, 'name': 'consumption', 'type': 'RandomDistributedScalarEncoder' } } encoder = MultiEncoder() encoder.addMultipleEncoders(encoderParams) sensorRegion.encoder = encoder # Add data source. testDir = os.path.dirname(os.path.abspath(__file__)) inputFile = os.path.join(testDir, 'fixtures', 'gymdata-test.csv') dataSource = FileRecordStream(streamID=inputFile) sensorRegion.dataSource = dataSource # Get and set what field index we want to predict. network.regions['sensor'].setParameter('predictedField', 'consumption') return network
def __init__(self, inputFilePath, verbosity=1, numLabels=3, spTrainingSize=0, tmTrainingSize=0, clsTrainingSize=0, classifierType="KNN"): """ @param inputFilePath (str) Path to data formatted for network API @param spTrainingSize (int) Number of samples the network has to be trained on before training the spatial pooler @param tmTrainingSize (int) Number of samples the network has to be trained on before training the temporal memory @param clsTrainingSize (int) Number of samples the network has to be trained on before training the classifier @param classifierType (str) Either "KNN" or "CLA" See ClassificationModel for remaining parameters """ self.spTrainingSize = spTrainingSize self.tmTrainingSize = tmTrainingSize self.clsTrainingSize = clsTrainingSize super(ClassificationModelHTM, self).__init__(verbosity=verbosity, numLabels=numLabels) # Initialize Network self.classifierType = classifierType self.recordStream = FileRecordStream(streamID=inputFilePath) self.encoder = CioEncoder(cacheDir="./experiments/cache") self._initModel()
def aggregate(dataPath, outputPath, days=0, hours=0): with FileRecordStream(dataPath) as reader: aggregator = Aggregator({'fields': [('messages', 'sum')], 'days': days, 'hours': hours}, reader.getFields()) with open(outputPath, 'w') as outfile: writer = csv.writer(outfile) writer.writerow(['timestamp', 'messages']) writer.writerow(['datetime', 'int']) writer.writerow(['T', '']) while True: inRecord = reader.getNextRecord() bookmark = reader.getBookmark() (aggRecord, aggBookmark) = aggregator.next(inRecord, bookmark) # reached EOF? if inRecord is None and aggRecord is None: break if aggRecord is not None: timestamp = aggRecord[0].strftime('%Y-%m-%d %H:%M:%S.0') writer.writerow([timestamp, aggRecord[1]])
def _createLPFNetwork(addSP = True, addTP = False): """Create an 'old-style' network ala LPF and return it.""" # ========================================================================== # Create the encoder and data source stuff we need to configure the sensor sensorParams = dict(verbosity = _VERBOSITY) encoder = _createEncoder() trainFile = findDataset("extra/gym/gym.csv") dataSource = FileRecordStream(streamID=trainFile) dataSource.setAutoRewind(True) # Create all the stuff we need to configure the CLARegion g_claConfig['spEnable'] = addSP g_claConfig['tpEnable'] = addTP claParams = _getCLAParams(encoder = encoder, config= g_claConfig) claParams['spSeed'] = g_claConfig['spSeed'] claParams['tpSeed'] = g_claConfig['tpSeed'] # ========================================================================== # Now create the network itself n = Network() n.addRegion("sensor", "py.RecordSensor", json.dumps(sensorParams)) sensor = n.regions['sensor'].getSelf() sensor.encoder = encoder sensor.dataSource = dataSource n.addRegion("level1", "py.CLARegion", json.dumps(claParams)) n.link("sensor", "level1", "UniformLink", "") n.link("sensor", "level1", "UniformLink", "", srcOutput="resetOut", destInput="resetIn") return n
def createAndRunNetwork(testRegionType, testOutputName, checkpointMidway=False, temporalImp=None): dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) if temporalImp is None: network = createNetwork(dataSource) else: network = createNetwork(dataSource, enableTP=True, temporalImp=temporalImp) network.initialize() results = [] for i in xrange(_NUM_RECORDS): if checkpointMidway and i == (_NUM_RECORDS / 2): network = saveAndLoadNetwork(network) # Run the network for a single iteration network.run(1) testRegion = network.getRegionsByType(testRegionType)[0] output = testRegion.getOutputData(testOutputName).copy() results.append(output) return results
def _sortChunk(records, key, chunkIndex, fields): """Sort in memory chunk of records records - a list of records read from the original dataset key - a list of indices to sort the records by chunkIndex - the index of the current chunk The records contain only the fields requested by the user. _sortChunk() will write the sorted records to a standard File named "chunk_<chunk index>.csv" (chunk_0.csv, chunk_1.csv,...). """ title(additional='(key=%s, chunkIndex=%d)' % (str(key), chunkIndex)) assert len(records) > 0 # Sort the current records records.sort(key=itemgetter(*key)) # Write to a chunk file if chunkIndex is not None: filename = 'chunk_%d.csv' % chunkIndex with FileRecordStream(filename, write=True, fields=fields) as o: for r in records: o.appendRecord(r) assert os.path.getsize(filename) > 0 return records
def run(numRecords): ''' Run the Hot Gym example. ''' # Create a data source for the network. dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = min(numRecords, dataSource.getDataRowCount()) network = createNetwork(dataSource) network.regions["sensor"].getSelf().predictedField = "price" # Set predicted field network.regions["sensor"].setParameter("predictedField", "price") # Enable learning for all regions. network.regions["SP"].setParameter("learningMode", 1) network.regions["TM"].setParameter("learningMode", 1) network.regions["classifier"].setParameter("learningMode", 1) # Enable inference for all regions. network.regions["SP"].setParameter("inferenceMode", 1) network.regions["TM"].setParameter("inferenceMode", 1) network.regions["classifier"].setParameter("inferenceMode", 1) results = [] N = _RUN_EPOCH # Run the network, N iterations at a time. graph = Graph({ 'title': 'Bitcoin Prediction', 'y_label': 'price', 'y_lim': 'auto', 'prediction_num': 2, 'line_labels': ['1-step', '5-step'] }) for iteration in range(0, numRecords, N): if iteration % _RUN_INTERVAL == 0: network.run(N) price = network.regions["sensor"].getOutputData("sourceOut")[0] predictionResults = getPredictionResults(network, "classifier") oneStep = predictionResults[1]["predictedValue"] oneStepConfidence = predictionResults[1]["predictionConfidence"] fiveStep = predictionResults[5]["predictedValue"] fiveStepConfidence = predictionResults[5]["predictionConfidence"] result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) if iteration % _PRINT_INTERVAL == 0: print "iteration: {}".format(iteration) print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result) results.append(result) graph.write(price, [oneStep, fiveStep]) graph.close() return results
def test_GenerateDataset(self): dataset = 'extra/gym/gym.csv' print "Using input dataset: ", dataset gymFileds = None with FileRecordStream(findDataset(dataset)) as f: gymFields = f.getFieldNames() aggregationOptions = dict(timeField=gymFields.index('timestamp'), fields=[('attendeeCount', sum), ('consumption', sum), ('timestamp', lambda x: x[0])], hours=5) handle = \ tempfile.NamedTemporaryFile(prefix='agg_gym_hours_5', suffix='.csv', dir=os.path.dirname(findDataset(dataset))) outputFile = handle.name handle.close() print "Expected outputFile path: ", outputFile print "Files in the destination folder before the test:" print os.listdir(os.path.abspath(os.path.dirname( findDataset(dataset)))) if os.path.isfile(outputFile): print "Removing existing outputFile: ", outputFile os.remove(outputFile) self.assertFalse(os.path.exists(outputFile), msg="Shouldn't exist, but does: " + str(outputFile)) result = generateDataset(aggregationOptions, dataset, outputFile) print "generateDataset() returned: ", result f1 = os.path.abspath(os.path.normpath(result)) print "normalized generateDataset() result path: ", f1 f2 = os.path.normpath(outputFile) print "normalized outputFile path: ", f2 self.assertEqual(f1, f2) print "Checking for presence of outputFile: ", outputFile self.assertTrue( os.path.isfile(outputFile), msg= "Missing outputFile: %r; normalized generateDataset() result: %r" % (outputFile, f1)) print "Files in the destination folder after the test:" print os.listdir(os.path.abspath(os.path.dirname( findDataset(dataset)))) print result print '-' * 30 return
def initialize(self): """ Initialize this node. """ Node.initialize(self) # Initialize input bits self.bits = [] for x in range(self.width): for y in range(self.height): bit = Bit() bit.x = x bit.y = y self.bits.append(bit) if self.data_source_type == DataSourceType.FILE: """ Initialize this node opening the file and place cursor on the first record. """ # If file name provided is a relative path, use project file path if self.file_name != '' and os.path.dirname(self.file_name) == '': full_file_name = os.path.dirname(Global.project.file_name) + '/' + self.file_name else: full_file_name = self.file_name # Check if file really exists if not os.path.isfile(full_file_name): QtWidgets.QMessageBox.warning(None, "Warning", "Input stream file '" + full_file_name + "' was not found or specified.", QtWidgets.QMessageBox.Ok) return # Create a data source for read the file self.data_source = FileRecordStream(full_file_name) elif self.data_source_type == DataSourceType.DATABASE: pass self.encoder = MultiEncoder() for encoding in self.encodings: encoding.initialize() # Create an instance class for an encoder given its module, class and constructor params encoding.encoder = getInstantiatedClass(encoding.encoder_module, encoding.encoder_class, encoding.encoder_params) # Take the first part of encoder field name as encoder name # Ex: timestamp_weekend.weekend => timestamp_weekend encoding.encoder.name = encoding.encoder_field_name.split('.')[0] # Add sub-encoder to multi-encoder list self.encoder.addEncoder(encoding.data_source_field_name, encoding.encoder) # If encoder size is not the same to sensor size then throws exception encoder_size = self.encoder.getWidth() sensor_size = self.width * self.height if encoder_size > sensor_size: QtWidgets.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoder_size) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensor_size) + ").", QtWidgets.QMessageBox.Ok) return return True
def initModel(self): """ Initialize the network; self.networdDataPath must already be set. """ recordStream = FileRecordStream(streamID=self.networkDataPath) encoder = CioEncoder(cacheDir="./experiments/cache") return configureNetwork(recordStream, self.networkConfig, encoder)
def test_GymAggregate(self): filename = resource_filename( "nupic.datafiles", "extra/gym/gym.csv" ) input = [] gymFields = None with FileRecordStream(filename) as f: gymFields = f.getFields() for i in range(10): input.append(f.getNextRecord()) for h in (1,3): aggregationOptions = dict( fields=[ ('timestamp', lambda x: x[0],), ('attendeeCount', sum), ('consumption', sum)], hours=h ) handle = \ tempfile.NamedTemporaryFile(prefix='test', suffix='.bin') outputFile = handle.name handle.close() dataInput = DataInputList(input, gymFields) dataOutput = DataOutputMyFile(FileRecordStream(outputFile, write=True, fields=gymFields)) _aggregate(input=dataInput, options=aggregationOptions, timeFieldName='timestamp', output=dataOutput) dataOutput.close() for r in FileRecordStream(outputFile): print(r) print('-' * 30) return
def testSample(self): mockInput = MagicMock(return_value=StringIO(self.sampleInput)) output = StringIO() mockOutput = MagicMock(return_value=output) with patch("__builtin__.open", mockInput): inputFile = FileRecordStream("input_path") with patch("__builtin__.open", mockOutput): outputFile = FileRecordStream("output_path", fields=inputFile.getFields(), write=True) anomalyzer.sample(inputFile, outputFile, 1) result = StringIO(output.getvalue()) result.next() result.next() result.next() reader = csv.reader(result) _, value = reader.next() self.assertIn(int(value), (1, 2, 3, 4, 5, 6)) self.assertRaises(StopIteration, result.next)
def _mergeFiles(key, chunkCount, outputFile, fields): """Merge sorted chunk files into a sorted output file chunkCount - the number of available chunk files outputFile the name of the sorted output file _mergeFiles() """ title() # Open all chun files files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)] # Open output file with FileRecordStream(outputFile, write=True, fields=fields) as o: # Open all chunk files files = [ FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount) ] records = [f.getNextRecord() for f in files] # This loop will run until all files are exhausted while not all(r is None for r in records): # Cleanup None values (files that were exhausted) indices = [i for i, r in enumerate(records) if r is not None] records = [records[i] for i in indices] files = [files[i] for i in indices] # Find the current record r = min(records, key=itemgetter(*key)) # Write it to the file o.appendRecord(r) # Find the index of file that produced the current record index = records.index(r) # Read a new record from the file records[index] = files[index].getNextRecord() # Cleanup chunk files for i, f in enumerate(files): f.close() os.remove('chunk_%d.csv' % i)
def run(): """ Run classification network(s) on artificial sensor data """ with open("network_config_template.json", "rb") as jsonFile: templateNetworkConfig = json.load(jsonFile) networkConfigurations = generateSampleNetworkConfig( templateNetworkConfig, NUM_CATEGORIES) for networkConfig in networkConfigurations: for noiseAmplitude in WHITE_NOISE_AMPLITUDES: for signalMean in SIGNAL_MEANS: for signalAmplitude in SIGNAL_AMPLITUDES: for signalPeriod in SIGNAL_PERIODS: sensorType = networkConfig["sensorRegionConfig"].get( "regionType") spEnabled = networkConfig["sensorRegionConfig"].get( "regionEnabled") tmEnabled = networkConfig["tmRegionConfig"].get( "regionEnabled") upEnabled = networkConfig["tpRegionConfig"].get( "regionEnabled") classifierType = networkConfig[ "classifierRegionConfig"].get("regionType") expParams = ( "RUNNING EXPERIMENT WITH PARAMS:\n" " * numRecords=%s\n" " * signalAmplitude=%s\n" " * signalMean=%s\n" " * signalPeriod=%s\n" " * noiseAmplitude=%s\n" " * sensorType=%s\n" " * spEnabled=%s\n" " * tmEnabled=%s\n" " * tpEnabled=%s\n" " * classifierType=%s\n") % ( NUM_RECORDS, signalAmplitude, signalMean, signalPeriod, noiseAmplitude, sensorType.split(".")[1], spEnabled, tmEnabled, upEnabled, classifierType.split(".")[1]) print expParams inputFile = generateSensorData( DATA_DIR, OUTFILE_NAME, signalMean, signalPeriod, SEQUENCE_LENGTH, NUM_RECORDS, signalAmplitude, NUM_CATEGORIES, noiseAmplitude) dataSource = FileRecordStream(streamID=inputFile) network = configureNetwork(dataSource, networkConfig) partitions = generateNetworkPartitions( networkConfig, NUM_RECORDS) trainNetwork(network, networkConfig, partitions, NUM_RECORDS)
def _generateScalar(filename="simple.csv", numSequences=2, elementsPerSeq=1, numRepeats=10, stepSize=0.1, includeRandom=False): """ Generate a simple dataset. This contains a bunch of non-overlapping sequences of scalar values. Parameters: ---------------------------------------------------- filename: name of the file to produce, including extension. It will be created in a 'datasets' sub-directory within the directory containing this script. numSequences: how many sequences to generate elementsPerSeq: length of each sequence numRepeats: how many times to repeat each sequence in the output stepSize: how far apart each scalar is includeRandom: if true, include another random field """ # Create the output file scriptDir = os.path.dirname(__file__) pathname = os.path.join(scriptDir, 'datasets', filename) print "Creating %s..." % (pathname) fields = [('classification', 'float', ''), ('field1', 'float', '')] if includeRandom: fields += [('randomData', 'float', '')] outFile = FileRecordStream(pathname, write=True, fields=fields) # Create the sequences sequences = [] for i in range(numSequences): seq = [x for x in range(i * elementsPerSeq, (i + 1) * elementsPerSeq)] sequences.append(seq) random.seed(42) # Write out the sequences in random order seqIdxs = [] for i in range(numRepeats): seqIdxs += range(numSequences) random.shuffle(seqIdxs) for seqIdx in seqIdxs: seq = sequences[seqIdx] for x in seq: if includeRandom: outFile.appendRecord([seqIdx, x * stepSize, random.random()]) else: outFile.appendRecord([seqIdx, x * stepSize]) outFile.close()
def _openStream(self, dataUrl, isBlocking, maxTimeout, bookmark, firstRecordIdx): """Open the underlying file stream. This only supports 'file://' prefixed paths. """ self._recordStoreName = findDataset(dataUrl[len(FILE_PREF):]) self._recordStore = FileRecordStream(streamID=self._recordStoreName, write=False, bookmark=bookmark, firstRecord=firstRecordIdx)
def runDemo(): dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = dataSource.getDataRowCount() print "Creating network" network = createNetwork(dataSource) outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME) with open(outputPath, "w") as outputFile: writer = csv.writer(outputFile) print "Running network" print "Writing output to: %s" % outputPath runNetwork(network, numRecords, writer) print "Hierarchy demo finished"
def test_GymAggregateWithOldData(self): filename = resource_filename( "nupic.datafiles", "extra/gym/gym.csv" ) input = [] gymFields = None with FileRecordStream(filename) as f: gymFields = f.getFields() for i in range(10): input.append(f.getNextRecord()) #Append the records from the beginning to the end of the dataset input.extend(input[0:3]) for h in (1,3): aggregationOptions = dict( fields=[ ('timestamp', lambda x: x[0],), ('attendeeCount', sum), ('consumption', sum)], hours=h ) handle = \ tempfile.NamedTemporaryFile(prefix='test', suffix='.bin') outputFile = handle.name handle.close() dataInput = DataInputList(input, gymFields) dataOutput = DataOutputList(None) _aggregate(input=dataInput, options=aggregationOptions, timeFieldName='timestamp', output=dataOutput) dataOutput.close() outputRecords = dataOutput._store timeFieldIdx = [f[0] for f in gymFields].index('timestamp') diffs = [] for i in range(1,len(outputRecords)): diffs.append(outputRecords[i][timeFieldIdx] - \ outputRecords[i-1][timeFieldIdx]) positiveTimeFlow = list(map((lambda x: x < datetime.timedelta(seconds=0)), diffs)) #Make sure that old records are in the aggregated output and at the same #time make sure that they are in consecutive order after being inserted self.assertEqual(sum(positiveTimeFlow), 1) return
def testFileRecordStreamReadData(self): ndg = NetworkDataGenerator() filename = os.path.join(self.dirName, "test_data/multi_sample.csv") ndg.split(filename, 3, False) dataOutputFile = os.path.join(self.dirName, "test_data/multi_sample_split.csv") categoriesOutputFile = os.path.join( self.dirName, "test_data/multi_sample_categories.json") ndg.saveData(dataOutputFile, categoriesOutputFile) # If no error is raised, then the data is in the correct format frs = FileRecordStream(dataOutputFile)
def run(numRecords): ''' Run the Hot Gym example. ''' # Create a data source for the network. dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = min(numRecords, dataSource.getDataRowCount()) network = createNetwork(dataSource) network.regions["sensor"].getSelf().predictedField = "sine" # Set predicted field network.regions["sensor"].setParameter("predictedField", "sine") # Enable learning for all regions. network.regions["SP"].setParameter("learningMode", 1) network.regions["TM"].setParameter("learningMode", 1) network.regions["classifier"].setParameter("learningMode", 1) # Enable inference for all regions. network.regions["SP"].setParameter("inferenceMode", 1) network.regions["TM"].setParameter("inferenceMode", 1) network.regions["classifier"].setParameter("inferenceMode", 1) results = [] N = 1 # Run the network, N iterations at a time. output = nupic_output.NuPICPlotOutput("Sine", show_anomaly_score=True) for iteration in range(0, numRecords, N): network.run(N) sine = network.regions["sensor"].getOutputData("sourceOut")[0] predictionResults = getPredictionResults(network, "classifier") oneStep = predictionResults[1]["predictedValue"] oneStepConfidence = predictionResults[1]["predictionConfidence"] fiveStep = predictionResults[10]["predictedValue"] fiveStepConfidence = predictionResults[10]["predictionConfidence"] result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) print "1-step: {:16} ({:4.4}%)\t 10-step: {:16} ({:4.4}%)".format( *result) results.append(result) output.write(sine, oneStep, 0) output.close() return results
def _openStream(self, dataUrl, isBlocking, maxTimeout, bookmark, firstRecordIdx): """Open the underlying file stream. This only supports 'file://' prefixed paths. """ filePath = dataUrl[len(FILE_PREF):] if not os.path.isabs(filePath): filePath = os.path.join(os.getcwd(), filePath) self._recordStoreName = filePath self._recordStore = FileRecordStream(streamID=self._recordStoreName, write=False, bookmark=bookmark, firstRecord=firstRecordIdx)
def _generateScalar(filename="simple.csv", numSequences=2, elementsPerSeq=1, numRepeats=10, stepSize=0.1, resets=False): """ Generate a simple dataset. This contains a bunch of non-overlapping sequences of scalar values. Parameters: ---------------------------------------------------- filename: name of the file to produce, including extension. It will be created in a 'datasets' sub-directory within the directory containing this script. numSequences: how many sequences to generate elementsPerSeq: length of each sequence numRepeats: how many times to repeat each sequence in the output stepSize: how far apart each scalar is resets: if True, turn on reset at start of each sequence """ # Create the output file scriptDir = os.path.dirname(__file__) pathname = os.path.join(scriptDir, 'datasets', filename) print("Creating %s..." % (pathname)) fields = [('reset', 'int', 'R'), ('category', 'int', 'C'), ('field1', 'float', '')] outFile = FileRecordStream(pathname, write=True, fields=fields) # Create the sequences sequences = [] for i in range(numSequences): seq = [x for x in range(i * elementsPerSeq, (i + 1) * elementsPerSeq)] sequences.append(seq) # Write out the sequences in random order seqIdxs = [] for i in range(numRepeats): seqIdxs += list(range(numSequences)) random.shuffle(seqIdxs) for seqIdx in seqIdxs: reset = int(resets) seq = sequences[seqIdx] for x in seq: outFile.appendRecord([reset, str(seqIdx), x * stepSize]) reset = 0 outFile.close()
def writeTestFile(testFile, fields, big): if big: print 'Creating big test file (763MB)...' payload = 'x' * 10 ** 8 else: print 'Creating a small big test file...' payload = 'x' * 3 with FileRecordStream(testFile, write=True, fields=fields) as o: print '.'; o.appendRecord([1,3,6, payload]) print '.'; o.appendRecord([2,3,6, payload]) print '.'; o.appendRecord([1,4,6, payload]) print '.'; o.appendRecord([2,4,6, payload]) print '.'; o.appendRecord([1,3,5, payload]) print '.'; o.appendRecord([2,3,5, payload]) print '.'; o.appendRecord([1,4,5, payload]) print '.'; o.appendRecord([2,4,5, payload])
def testSaveAndReload(self): """ This function tests saving and loading. It will train a network for 500 iterations, then save it and reload it as a second network instance. It will then run both networks for 100 iterations and ensure they return identical results. """ print "Creating network..." netOPF = _createOPFNetwork() level1OPF = netOPF.regions['level1SP'] # ========================================================================== print "Training network for 500 iterations" level1OPF.setParameter('learningMode', 1) level1OPF.setParameter('inferenceMode', 0) netOPF.run(500) level1OPF.setParameter('learningMode', 0) level1OPF.setParameter('inferenceMode', 1) # ========================================================================== # Save network and reload as a second instance. We need to reset the data # source for the unsaved network so that both instances start at the same # place print "Saving and reload network" _, tmpNetworkFilename = _setupTempDirectory("trained.nta") netOPF.save(tmpNetworkFilename) netOPF2 = Network(tmpNetworkFilename) level1OPF2 = netOPF2.regions['level1SP'] sensor = netOPF.regions['sensor'].getSelf() trainFile = resource_filename("nupic.datafiles", "extra/gym/gym.csv") sensor.dataSource = FileRecordStream(streamID=trainFile) sensor.dataSource.setAutoRewind(True) # ========================================================================== print "Running inference on the two networks for 100 iterations" for _ in xrange(100): netOPF2.run(1) netOPF.run(1) l1outputOPF2 = level1OPF2.getOutputData("bottomUpOut") l1outputOPF = level1OPF.getOutputData("bottomUpOut") opfHash2 = l1outputOPF2.nonzero()[0].sum() opfHash = l1outputOPF.nonzero()[0].sum() self.assertEqual(opfHash2, opfHash)