Python FileRecordStream.FileRecordStream примеры, nupic.data.file_record_stream.FileRecordStream.FileRecordStream Python примеры использования

Пример #1

0

Показать файл

    def testBadDataset(self):

        filename = _getTempFileName()

        print 'Creating tempfile:', filename

        # Write bad dataset with records going backwards in time
        fields = [
            FieldMetaInfo('timestamp', FieldMetaType.datetime,
                          FieldMetaSpecial.timestamp)
        ]
        o = FileRecordStream(streamID=filename, write=True, fields=fields)
        # Records
        records = ([datetime(day=3, month=3, year=2010)],
                   [datetime(day=2, month=3, year=2010)])

        o.appendRecord(records[0])
        o.appendRecord(records[1])
        o.close()

        # Write bad dataset with broken sequences
        fields = [
            FieldMetaInfo('sid', FieldMetaType.integer,
                          FieldMetaSpecial.sequence)
        ]
        o = FileRecordStream(streamID=filename, write=True, fields=fields)
        # Records
        records = ([1], [2], [1])

        o.appendRecord(records[0])
        o.appendRecord(records[1])
        self.assertRaises(Exception, o.appendRecord, (records[2], ))
        o.close()

Пример #2

0

Показать файл

Файл: aggregation_test.py Проект: sveitser/nupic

    def test_WeightedMean(self):
        # Cleanup old files
        #for f in glob.glob('*.*'):
        #  if 'auto_specials' in f:
        #    os.remove(f)

        fields = [
            ('dummy1', 'int', ''),
            ('dummy2', 'int', ''),
            ('timestamp', 'datetime', 'T'),
        ]

        records = (
            [10, 1, datetime.datetime(2000, 3, 1)],
            [5, 2, datetime.datetime(2000, 3, 2)],
            [1, 100, datetime.datetime(2000, 3, 3)],
            [2, 4, datetime.datetime(2000, 3, 4)],
            [4, 1, datetime.datetime(2000, 3, 5)],
            [4, 0, datetime.datetime(2000, 3, 6)],
            [5, 0, datetime.datetime(2000, 3, 7)],
            [6, 0, datetime.datetime(2000, 3, 8)],
        )

        if not os.path.isdir('data'):
            os.makedirs('data')

        with FileRecordStream('data/weighted_mean.csv', write=True, fields=fields) \
              as o:
            for r in records:
                o.appendRecord(r)

        # Aggregate just the dummy field, all the specials should be added
        ai = dict(fields=[('dummy1', 'wmean:dummy2', None),
                          ('dummy2', 'mean', None)],
                  days=2)

        handle = \
          tempfile.NamedTemporaryFile(prefix='weighted_mean',
            suffix='.csv',
            dir='.')
        tempFile = handle.name
        handle.close()

        outputFile = generateDataset(ai, 'weighted_mean.csv', tempFile)

        result = []
        with FileRecordStream(outputFile) as f:
            print f.getFields()
            for r in f:
                result.append(r)

        self.assertEqual(result[0][0], 6.0)
        self.assertEqual(result[0][1], 1.0)
        self.assertEqual(result[1][0], 1.0)
        self.assertEqual(result[1][1], 52.0)
        self.assertEqual(result[2][0], 4.0)
        self.assertEqual(result[2][1], 0.0)
        self.assertEqual(result[3][0], None)
        self.assertEqual(result[3][1], 0.0)
        return

Пример #3

0

Показать файл

 def testCopyOneRow(self):
     expectedOutput = ("Timestamp,Value\n"
                       "datetime,int\n"
                       "T,\n"
                       "2011-09-04 02:00:00.000000,1\n"
                       "2011-09-04 02:05:00.000000,2\n"
                       "2011-09-04 02:10:00.000000,2\n"
                       "2011-09-04 02:15:00.000000,3\n"
                       "2011-09-04 02:20:00.000000,4\n"
                       "2011-09-04 02:25:00.000000,5\n"
                       "2011-09-04 02:30:00.000000,6\n")
     mockInput = MagicMock(return_value=StringIO(self.sampleInput))
     output = StringIO()
     mockOutput = MagicMock(return_value=output)
     with patch("__builtin__.open", mockInput):
         inputFile = FileRecordStream("input_path")
         with patch("__builtin__.open", mockOutput):
             outputFile = FileRecordStream("output_path",
                                           fields=inputFile.getFields(),
                                           write=True)
             anomalyzer.copy(inputFile, outputFile, 1, 1, 1)
     result = output.getvalue()
     result = result.replace("\r\n", "\n")
     result = result.replace("\r", "\n")
     self.assertSequenceEqual(expectedOutput, result)

Пример #4

0

Показать файл

Файл: aggregation_test.py Проект: sveitser/nupic

    def test_AutoSpecialFields(self):
        # Cleanup old files
        #for f in glob.glob('*.*'):
        #  if 'auto_specials' in f:
        #    os.remove(f)

        fields = [
            ('dummy', 'string', ''),
            ('timestamp', 'datetime', 'T'),
            ('reset', 'int', 'R'),
            ('sid', 'int', 'S'),
        ]

        records = (
            ['dummy-1', datetime.datetime(2000, 3, 1), 1, 1],
            ['dummy-2', datetime.datetime(2000, 3, 2), 0, 1],
            ['dummy-3', datetime.datetime(2000, 3, 3), 0, 1],
            ['dummy-4', datetime.datetime(2000, 3, 4), 1, 2],
            ['dummy-5', datetime.datetime(2000, 3, 5), 0, 2],
        )

        if not os.path.isdir('data'):
            os.makedirs('data')

        with FileRecordStream('data/auto_specials.csv', write=True, fields=fields) \
               as o:
            for r in records:
                o.appendRecord(r)

        # Aggregate just the dummy field, all the specials should be added
        ai = dict(fields=[('dummy', lambda x: x[0])], weeks=3)

        handle = \
          tempfile.NamedTemporaryFile(prefix='auto_specials',
            suffix='.csv',
            dir='.')
        tempFile = handle.name
        handle.close()

        outputFile = generateDataset(ai, 'auto_specials.csv', tempFile)

        result = []
        with FileRecordStream(outputFile) as f:
            print f.getFields()
            for r in f:
                result.append(r)

        self.assertEqual(result[0][2], 1)  # reset
        self.assertEqual(result[0][3], 1)  # seq id
        self.assertEqual(result[0][0], 'dummy-1')
        self.assertEqual(result[1][2], 1)  # reset
        self.assertEqual(result[1][3], 2)  # seq id
        self.assertEqual(result[1][0], 'dummy-4')

        return

Пример #5

0

Показать файл

Файл: anomalyzer.py Проект: hoploop/nupic3

def main(args):
    inputPath, outputPath, action = args[:3]
    with FileRecordStream(inputPath) as reader:
        with FileRecordStream(outputPath, write=True,
                              fields=reader.fields) as writer:
            assert action in Actions.ACTIONS, USAGE
            if action == Actions.ADD:
                assert len(args) == 7, USAGE
                start = int(args[4])
                stop = int(args[5])
                column = int(args[3])
                valueType = eval(reader.fields[column][1])
                value = valueType(args[6])
                add(reader, writer, column, start, stop, value)
            elif action == Actions.SCALE:
                assert len(args) == 7, USAGE
                start = int(args[4])
                stop = int(args[5])
                column = int(args[3])
                valueType = eval(reader.fields[column][1])
                multiple = valueType(args[6])
                scale(reader, writer, column, start, stop, multiple)
            elif action == Actions.COPY:
                assert 5 <= len(args) <= 8, USAGE
                start = int(args[3])
                stop = int(args[4])
                if len(args) > 5:
                    insertLocation = int(args[5])
                else:
                    insertLocation = None
                if len(args) == 7:
                    tsCol = int(args[6])
                else:
                    tsCol = None
                copy(reader, writer, start, stop, insertLocation, tsCol)
            elif action == Actions.SAMPLE or action == Actions.SAMPLE2:
                assert 4 <= len(args) <= 7, USAGE
                n = int(args[3])
                start = None
                if len(args) > 4:
                    start = int(args[4])
                stop = None
                if len(args) > 5:
                    stop = int(args[5])
                tsCol = None
                if len(args) > 6:
                    tsCol = int(args[6])
                writeSampleOnly = action == Actions.SAMPLE
                sample(reader, writer, n, start, stop, tsCol, writeSampleOnly)

Пример #6

0

Показать файл

def _createNetwork():
    """Create network with one RecordSensor region."""
    network = Network()
    network.addRegion('sensor', 'py.RecordSensor', '{}')
    sensorRegion = network.regions['sensor'].getSelf()

    # Add an encoder.
    encoderParams = {
        'consumption': {
            'fieldname': 'consumption',
            'resolution': 0.88,
            'seed': 1,
            'name': 'consumption',
            'type': 'RandomDistributedScalarEncoder'
        }
    }

    encoder = MultiEncoder()
    encoder.addMultipleEncoders(encoderParams)
    sensorRegion.encoder = encoder

    # Add a data source.
    testDir = os.path.dirname(os.path.abspath(__file__))
    inputFile = os.path.join(testDir, 'fixtures', 'gymdata-test.csv')
    dataSource = FileRecordStream(streamID=inputFile)
    sensorRegion.dataSource = dataSource

    # Get and set what field index we want to predict.
    predictedIdx = dataSource.getFieldNames().index('consumption')
    network.regions['sensor'].setParameter('predictedFieldIdx', predictedIdx)

    return network

Пример #7

0

Показать файл

def _createNetwork():
    """Create a network with a RecordSensor region and a SDRClassifier region"""

    network = Network()
    network.addRegion('sensor', 'py.RecordSensor', '{}')
    network.addRegion('classifier', 'py.SDRClassifierRegion', '{}')
    _createSensorToClassifierLinks(network, 'sensor', 'classifier')

    # Add encoder to sensor region.
    sensorRegion = network.regions['sensor'].getSelf()
    encoderParams = {
        'consumption': {
            'fieldname': 'consumption',
            'resolution': 0.88,
            'seed': 1,
            'name': 'consumption',
            'type': 'RandomDistributedScalarEncoder'
        }
    }

    encoder = MultiEncoder()
    encoder.addMultipleEncoders(encoderParams)
    sensorRegion.encoder = encoder

    # Add data source.
    testDir = os.path.dirname(os.path.abspath(__file__))
    inputFile = os.path.join(testDir, 'fixtures', 'gymdata-test.csv')
    dataSource = FileRecordStream(streamID=inputFile)
    sensorRegion.dataSource = dataSource

    # Get and set what field index we want to predict.
    network.regions['sensor'].setParameter('predictedField', 'consumption')

    return network

Пример #8

0

Показать файл

Файл: classify_htm.py Проект: lscheinkman/nupic.fluent

  def __init__(self, inputFilePath, verbosity=1, numLabels=3, spTrainingSize=0,
               tmTrainingSize=0, clsTrainingSize=0, classifierType="KNN"):
    """
    @param inputFilePath      (str)       Path to data formatted for network
                                          API
    @param spTrainingSize     (int)       Number of samples the network has to
                                          be trained on before training the
                                          spatial pooler
    @param tmTrainingSize     (int)       Number of samples the network has to
                                          be trained on before training the
                                          temporal memory
    @param clsTrainingSize    (int)       Number of samples the network has to
                                          be trained on before training the
                                          classifier
    @param classifierType     (str)       Either "KNN" or "CLA"
    See ClassificationModel for remaining parameters
    """
    self.spTrainingSize = spTrainingSize
    self.tmTrainingSize = tmTrainingSize
    self.clsTrainingSize = clsTrainingSize

    super(ClassificationModelHTM, self).__init__(verbosity=verbosity,
      numLabels=numLabels)

    # Initialize Network
    self.classifierType = classifierType
    self.recordStream = FileRecordStream(streamID=inputFilePath)
    self.encoder = CioEncoder(cacheDir="./experiments/cache")
    self._initModel()

Пример #9

0

Показать файл

Файл: aggregate.py Проект: rhyolight/nupic.hangouts

def aggregate(dataPath, outputPath, days=0, hours=0):
  with FileRecordStream(dataPath) as reader:
    aggregator = Aggregator({'fields': [('messages', 'sum')],
                             'days': days,
                             'hours': hours},
                            reader.getFields())

    with open(outputPath, 'w') as outfile:
      writer = csv.writer(outfile)

      writer.writerow(['timestamp', 'messages'])
      writer.writerow(['datetime', 'int'])
      writer.writerow(['T', ''])

      while True:
        inRecord = reader.getNextRecord()
        bookmark = reader.getBookmark()

        (aggRecord, aggBookmark) = aggregator.next(inRecord, bookmark)

        # reached EOF?
        if inRecord is None and aggRecord is None:
          break

        if aggRecord is not None:
          timestamp = aggRecord[0].strftime('%Y-%m-%d %H:%M:%S.0')
          writer.writerow([timestamp, aggRecord[1]])

Пример #10

0

Показать файл

Файл: opf_region_test.py Проект: sveitser/nupic

def _createLPFNetwork(addSP = True, addTP = False):
  """Create an 'old-style' network ala LPF and return it."""

  # ==========================================================================
  # Create the encoder and data source stuff we need to configure the sensor
  sensorParams = dict(verbosity = _VERBOSITY)
  encoder = _createEncoder()
  trainFile = findDataset("extra/gym/gym.csv")
  dataSource = FileRecordStream(streamID=trainFile)
  dataSource.setAutoRewind(True)

  # Create all the stuff we need to configure the CLARegion
  g_claConfig['spEnable'] = addSP
  g_claConfig['tpEnable'] = addTP
  claParams = _getCLAParams(encoder = encoder, config= g_claConfig)
  claParams['spSeed'] = g_claConfig['spSeed']
  claParams['tpSeed'] = g_claConfig['tpSeed']

  # ==========================================================================
  # Now create the network itself
  n = Network()

  n.addRegion("sensor", "py.RecordSensor", json.dumps(sensorParams))

  sensor = n.regions['sensor'].getSelf()
  sensor.encoder = encoder
  sensor.dataSource = dataSource

  n.addRegion("level1", "py.CLARegion", json.dumps(claParams))

  n.link("sensor", "level1", "UniformLink", "")
  n.link("sensor", "level1", "UniformLink", "",
         srcOutput="resetOut", destInput="resetIn")

  return n

Пример #11

0

Показать файл

Файл: network_creation_common.py Проект: zelladoor/nupic

def createAndRunNetwork(testRegionType,
                        testOutputName,
                        checkpointMidway=False,
                        temporalImp=None):
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)

    if temporalImp is None:
        network = createNetwork(dataSource)
    else:
        network = createNetwork(dataSource,
                                enableTP=True,
                                temporalImp=temporalImp)
    network.initialize()

    results = []

    for i in xrange(_NUM_RECORDS):
        if checkpointMidway and i == (_NUM_RECORDS / 2):
            network = saveAndLoadNetwork(network)

        # Run the network for a single iteration
        network.run(1)

        testRegion = network.getRegionsByType(testRegionType)[0]
        output = testRegion.getOutputData(testOutputName).copy()
        results.append(output)

    return results

Пример #12

0

Показать файл

Файл: sorter.py Проект: ldd2816/nupic

def _sortChunk(records, key, chunkIndex, fields):
  """Sort in memory chunk of records

  records - a list of records read from the original dataset
  key - a list of indices to sort the records by
  chunkIndex - the index of the current chunk

  The records contain only the fields requested by the user.

  _sortChunk() will write the sorted records to a standard File
  named "chunk_<chunk index>.csv" (chunk_0.csv, chunk_1.csv,...).
  """
  title(additional='(key=%s, chunkIndex=%d)' % (str(key), chunkIndex))

  assert len(records) > 0

  # Sort the current records
  records.sort(key=itemgetter(*key))

  # Write to a chunk file
  if chunkIndex is not None:
    filename = 'chunk_%d.csv' % chunkIndex
    with FileRecordStream(filename, write=True, fields=fields) as o:
      for r in records:
        o.appendRecord(r)

    assert os.path.getsize(filename) > 0

  return records

Пример #13

0

Показать файл

Файл: network.py Проект: keijinamba/nupic-example

def run(numRecords):
  '''
  Run the Hot Gym example.
  '''

  # Create a data source for the network.
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = min(numRecords, dataSource.getDataRowCount())
  network = createNetwork(dataSource)

  network.regions["sensor"].getSelf().predictedField = "price"

  # Set predicted field
  network.regions["sensor"].setParameter("predictedField", "price")

  # Enable learning for all regions.
  network.regions["SP"].setParameter("learningMode", 1)
  network.regions["TM"].setParameter("learningMode", 1)
  network.regions["classifier"].setParameter("learningMode", 1)

  # Enable inference for all regions.
  network.regions["SP"].setParameter("inferenceMode", 1)
  network.regions["TM"].setParameter("inferenceMode", 1)
  network.regions["classifier"].setParameter("inferenceMode", 1)

  results = []
  N = _RUN_EPOCH  # Run the network, N iterations at a time.
  graph = Graph({
    'title': 'Bitcoin Prediction',
    'y_label': 'price',
    'y_lim': 'auto',
    'prediction_num': 2,
    'line_labels': ['1-step', '5-step']
  })
  for iteration in range(0, numRecords, N):
    if iteration % _RUN_INTERVAL == 0:
      network.run(N)

      price = network.regions["sensor"].getOutputData("sourceOut")[0]

      predictionResults = getPredictionResults(network, "classifier")
      oneStep = predictionResults[1]["predictedValue"]
      oneStepConfidence = predictionResults[1]["predictionConfidence"]
      fiveStep = predictionResults[5]["predictedValue"]
      fiveStepConfidence = predictionResults[5]["predictionConfidence"]

      result = (oneStep, oneStepConfidence * 100,
                fiveStep, fiveStepConfidence * 100)
      
      if iteration % _PRINT_INTERVAL == 0:
        print "iteration: {}".format(iteration)
        print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result)
      
      results.append(result)

      graph.write(price, [oneStep, fiveStep])
  
  graph.close()

  return results

Пример #14

0

Показать файл

Файл: aggregation_test.py Проект: sveitser/nupic

    def test_GenerateDataset(self):
        dataset = 'extra/gym/gym.csv'

        print "Using input dataset: ", dataset

        gymFileds = None
        with FileRecordStream(findDataset(dataset)) as f:
            gymFields = f.getFieldNames()

        aggregationOptions = dict(timeField=gymFields.index('timestamp'),
                                  fields=[('attendeeCount', sum),
                                          ('consumption', sum),
                                          ('timestamp', lambda x: x[0])],
                                  hours=5)

        handle = \
          tempfile.NamedTemporaryFile(prefix='agg_gym_hours_5',
            suffix='.csv',
            dir=os.path.dirname(findDataset(dataset)))
        outputFile = handle.name
        handle.close()

        print "Expected outputFile path: ", outputFile

        print "Files in the destination folder before the test:"
        print os.listdir(os.path.abspath(os.path.dirname(
            findDataset(dataset))))

        if os.path.isfile(outputFile):
            print "Removing existing outputFile: ", outputFile
            os.remove(outputFile)

        self.assertFalse(os.path.exists(outputFile),
                         msg="Shouldn't exist, but does: " + str(outputFile))

        result = generateDataset(aggregationOptions, dataset, outputFile)
        print "generateDataset() returned: ", result

        f1 = os.path.abspath(os.path.normpath(result))
        print "normalized generateDataset() result path: ", f1
        f2 = os.path.normpath(outputFile)
        print "normalized outputFile path: ", f2
        self.assertEqual(f1, f2)

        print "Checking for presence of outputFile: ", outputFile
        self.assertTrue(
            os.path.isfile(outputFile),
            msg=
            "Missing outputFile: %r; normalized generateDataset() result: %r" %
            (outputFile, f1))

        print "Files in the destination folder after the test:"
        print os.listdir(os.path.abspath(os.path.dirname(
            findDataset(dataset))))

        print result
        print '-' * 30

        return

Пример #15

0

Показать файл

Файл: node_sensor.py Проект: sergius-htm/nupic.studio

    def initialize(self):
        """
        Initialize this node.
        """
        Node.initialize(self)

        # Initialize input bits
        self.bits = []
        for x in range(self.width):
            for y in range(self.height):
                bit = Bit()
                bit.x = x
                bit.y = y
                self.bits.append(bit)

        if self.data_source_type == DataSourceType.FILE:
            """
            Initialize this node opening the file and place cursor on the first record.
            """

            # If file name provided is a relative path, use project file path
            if self.file_name != '' and os.path.dirname(self.file_name) == '':
                full_file_name = os.path.dirname(Global.project.file_name) + '/' + self.file_name
            else:
                full_file_name = self.file_name

            # Check if file really exists
            if not os.path.isfile(full_file_name):
                QtWidgets.QMessageBox.warning(None, "Warning", "Input stream file '" + full_file_name + "' was not found or specified.", QtWidgets.QMessageBox.Ok)
                return

            # Create a data source for read the file
            self.data_source = FileRecordStream(full_file_name)

        elif self.data_source_type == DataSourceType.DATABASE:
            pass

        self.encoder = MultiEncoder()
        for encoding in self.encodings:
            encoding.initialize()

            # Create an instance class for an encoder given its module, class and constructor params
            encoding.encoder = getInstantiatedClass(encoding.encoder_module, encoding.encoder_class, encoding.encoder_params)

            # Take the first part of encoder field name as encoder name
            # Ex: timestamp_weekend.weekend => timestamp_weekend
            encoding.encoder.name = encoding.encoder_field_name.split('.')[0]

            # Add sub-encoder to multi-encoder list
            self.encoder.addEncoder(encoding.data_source_field_name, encoding.encoder)

        # If encoder size is not the same to sensor size then throws exception
        encoder_size = self.encoder.getWidth()
        sensor_size = self.width * self.height
        if encoder_size > sensor_size:
            QtWidgets.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoder_size) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensor_size) + ").", QtWidgets.QMessageBox.Ok)
            return

        return True

Пример #16

0

Показать файл

Файл: classify_htm.py Проект: numenta/nupic.fluent

    def initModel(self):
        """
    Initialize the network; self.networdDataPath must already be set.
    """
        recordStream = FileRecordStream(streamID=self.networkDataPath)
        encoder = CioEncoder(cacheDir="./experiments/cache")

        return configureNetwork(recordStream, self.networkConfig, encoder)

Пример #17

0

Показать файл

Файл: aggregation_test.py Проект: pastorenick/nupic

  def test_GymAggregate(self):
    filename = resource_filename(
      "nupic.datafiles", "extra/gym/gym.csv"
    )

    input = []

    gymFields = None

    with FileRecordStream(filename) as f:
      gymFields = f.getFields()
      for i in range(10):
        input.append(f.getNextRecord())

    for h in (1,3):
      aggregationOptions = dict(
        fields=[
          ('timestamp', lambda x: x[0],),
          ('attendeeCount', sum),
          ('consumption', sum)],
        hours=h
      )


      handle = \
        tempfile.NamedTemporaryFile(prefix='test', 
          suffix='.bin')
      outputFile = handle.name
      handle.close()
      
      dataInput = DataInputList(input, gymFields)
      dataOutput = DataOutputMyFile(FileRecordStream(outputFile, write=True,
                                                     fields=gymFields))

      _aggregate(input=dataInput, options=aggregationOptions, 
                 timeFieldName='timestamp', output=dataOutput)

      dataOutput.close()

      for r in FileRecordStream(outputFile):
        print(r)
      print('-' * 30)

    return

Пример #18

0

Показать файл

 def testSample(self):
     mockInput = MagicMock(return_value=StringIO(self.sampleInput))
     output = StringIO()
     mockOutput = MagicMock(return_value=output)
     with patch("__builtin__.open", mockInput):
         inputFile = FileRecordStream("input_path")
         with patch("__builtin__.open", mockOutput):
             outputFile = FileRecordStream("output_path",
                                           fields=inputFile.getFields(),
                                           write=True)
             anomalyzer.sample(inputFile, outputFile, 1)
     result = StringIO(output.getvalue())
     result.next()
     result.next()
     result.next()
     reader = csv.reader(result)
     _, value = reader.next()
     self.assertIn(int(value), (1, 2, 3, 4, 5, 6))
     self.assertRaises(StopIteration, result.next)

Пример #19

0

Показать файл

Файл: sorter.py Проект: yangzxstar/nupic

def _mergeFiles(key, chunkCount, outputFile, fields):
    """Merge sorted chunk files into a sorted output file

  chunkCount - the number of available chunk files
  outputFile the name of the sorted output file

  _mergeFiles()

  """
    title()

    # Open all chun files
    files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)]

    # Open output file
    with FileRecordStream(outputFile, write=True, fields=fields) as o:
        # Open all chunk files
        files = [
            FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)
        ]
        records = [f.getNextRecord() for f in files]

        # This loop will run until all files are exhausted
        while not all(r is None for r in records):
            # Cleanup None values (files that were exhausted)
            indices = [i for i, r in enumerate(records) if r is not None]
            records = [records[i] for i in indices]
            files = [files[i] for i in indices]

            # Find the current record
            r = min(records, key=itemgetter(*key))
            # Write it to the file
            o.appendRecord(r)

            # Find the index of file that produced the current record
            index = records.index(r)
            # Read a new record from the file
            records[index] = files[index].getNextRecord()

    # Cleanup chunk files
    for i, f in enumerate(files):
        f.close()
        os.remove('chunk_%d.csv' % i)

Пример #20

0

Показать файл

Файл: run_experiments.py Проект: oxtopus/htmresearch

def run():
    """ Run classification network(s) on artificial sensor data """
    with open("network_config_template.json", "rb") as jsonFile:
        templateNetworkConfig = json.load(jsonFile)

    networkConfigurations = generateSampleNetworkConfig(
        templateNetworkConfig, NUM_CATEGORIES)

    for networkConfig in networkConfigurations:
        for noiseAmplitude in WHITE_NOISE_AMPLITUDES:
            for signalMean in SIGNAL_MEANS:
                for signalAmplitude in SIGNAL_AMPLITUDES:
                    for signalPeriod in SIGNAL_PERIODS:
                        sensorType = networkConfig["sensorRegionConfig"].get(
                            "regionType")
                        spEnabled = networkConfig["sensorRegionConfig"].get(
                            "regionEnabled")
                        tmEnabled = networkConfig["tmRegionConfig"].get(
                            "regionEnabled")
                        upEnabled = networkConfig["tpRegionConfig"].get(
                            "regionEnabled")
                        classifierType = networkConfig[
                            "classifierRegionConfig"].get("regionType")

                        expParams = (
                            "RUNNING EXPERIMENT WITH PARAMS:\n"
                            " * numRecords=%s\n"
                            " * signalAmplitude=%s\n"
                            " * signalMean=%s\n"
                            " * signalPeriod=%s\n"
                            " * noiseAmplitude=%s\n"
                            " * sensorType=%s\n"
                            " * spEnabled=%s\n"
                            " * tmEnabled=%s\n"
                            " * tpEnabled=%s\n"
                            " * classifierType=%s\n") % (
                                NUM_RECORDS, signalAmplitude, signalMean,
                                signalPeriod, noiseAmplitude,
                                sensorType.split(".")[1], spEnabled, tmEnabled,
                                upEnabled, classifierType.split(".")[1])
                        print expParams

                        inputFile = generateSensorData(
                            DATA_DIR, OUTFILE_NAME, signalMean, signalPeriod,
                            SEQUENCE_LENGTH, NUM_RECORDS, signalAmplitude,
                            NUM_CATEGORIES, noiseAmplitude)

                        dataSource = FileRecordStream(streamID=inputFile)
                        network = configureNetwork(dataSource, networkConfig)
                        partitions = generateNetworkPartitions(
                            networkConfig, NUM_RECORDS)

                        trainNetwork(network, networkConfig, partitions,
                                     NUM_RECORDS)

Пример #21

0

Показать файл

def _generateScalar(filename="simple.csv",
                    numSequences=2,
                    elementsPerSeq=1,
                    numRepeats=10,
                    stepSize=0.1,
                    includeRandom=False):
    """ Generate a simple dataset. This contains a bunch of non-overlapping
  sequences of scalar values. 
  
  Parameters:
  ----------------------------------------------------
  filename:       name of the file to produce, including extension. It will
                  be created in a 'datasets' sub-directory within the 
                  directory containing this script. 
  numSequences:   how many sequences to generate
  elementsPerSeq: length of each sequence
  numRepeats:     how many times to repeat each sequence in the output
  stepSize:       how far apart each scalar is 
  includeRandom:  if true, include another random field
  """

    # Create the output file
    scriptDir = os.path.dirname(__file__)
    pathname = os.path.join(scriptDir, 'datasets', filename)
    print "Creating %s..." % (pathname)
    fields = [('classification', 'float', ''), ('field1', 'float', '')]
    if includeRandom:
        fields += [('randomData', 'float', '')]

    outFile = FileRecordStream(pathname, write=True, fields=fields)

    # Create the sequences
    sequences = []
    for i in range(numSequences):
        seq = [x for x in range(i * elementsPerSeq, (i + 1) * elementsPerSeq)]
        sequences.append(seq)

    random.seed(42)

    # Write out the sequences in random order
    seqIdxs = []
    for i in range(numRepeats):
        seqIdxs += range(numSequences)
    random.shuffle(seqIdxs)

    for seqIdx in seqIdxs:
        seq = sequences[seqIdx]
        for x in seq:
            if includeRandom:
                outFile.appendRecord([seqIdx, x * stepSize, random.random()])
            else:
                outFile.appendRecord([seqIdx, x * stepSize])

    outFile.close()

Пример #22

0

Показать файл

Файл: stream_reader.py Проект: sveitser/nupic

    def _openStream(self, dataUrl, isBlocking, maxTimeout, bookmark,
                    firstRecordIdx):
        """Open the underlying file stream.

    This only supports 'file://' prefixed paths.
    """
        self._recordStoreName = findDataset(dataUrl[len(FILE_PREF):])
        self._recordStore = FileRecordStream(streamID=self._recordStoreName,
                                             write=False,
                                             bookmark=bookmark,
                                             firstRecord=firstRecordIdx)

Пример #23

0

Показать файл

Файл: hierarchy_network_demo.py Проект: xiaochouxiaohai/nupic

def runDemo():
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = dataSource.getDataRowCount()
    print "Creating network"
    network = createNetwork(dataSource)
    outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME)
    with open(outputPath, "w") as outputFile:
        writer = csv.writer(outputFile)
        print "Running network"
        print "Writing output to: %s" % outputPath
        runNetwork(network, numRecords, writer)
    print "Hierarchy demo finished"

Пример #24

0

Показать файл

Файл: aggregation_test.py Проект: pastorenick/nupic

  def test_GymAggregateWithOldData(self):
    filename = resource_filename(
      "nupic.datafiles", "extra/gym/gym.csv"
    )

    input = []

    gymFields = None

    with FileRecordStream(filename) as f:
      gymFields = f.getFields()
      for i in range(10):
        input.append(f.getNextRecord())

    #Append the records from the beginning to the end of the dataset
    input.extend(input[0:3])
    for h in (1,3):
      aggregationOptions = dict(
        fields=[
          ('timestamp', lambda x: x[0],),
          ('attendeeCount', sum),
          ('consumption', sum)],
        hours=h
      )


      handle = \
        tempfile.NamedTemporaryFile(prefix='test', 
          suffix='.bin')
      outputFile = handle.name
      handle.close()
      
      dataInput = DataInputList(input, gymFields)
      dataOutput = DataOutputList(None)

      _aggregate(input=dataInput, options=aggregationOptions, 
                 timeFieldName='timestamp', output=dataOutput)
      dataOutput.close()

      outputRecords = dataOutput._store
      
      timeFieldIdx = [f[0] for f in gymFields].index('timestamp')
      diffs = []
      for i in range(1,len(outputRecords)):
        diffs.append(outputRecords[i][timeFieldIdx] - \
                     outputRecords[i-1][timeFieldIdx])
      positiveTimeFlow = list(map((lambda x: x < datetime.timedelta(seconds=0)), 
                            diffs))
      #Make sure that old records are in the aggregated output and at the same
      #time make sure that they are in consecutive order after being inserted
      self.assertEqual(sum(positiveTimeFlow), 1)
        
    return

Пример #25

0

Показать файл

    def testFileRecordStreamReadData(self):
        ndg = NetworkDataGenerator()
        filename = os.path.join(self.dirName, "test_data/multi_sample.csv")
        ndg.split(filename, 3, False)
        dataOutputFile = os.path.join(self.dirName,
                                      "test_data/multi_sample_split.csv")
        categoriesOutputFile = os.path.join(
            self.dirName, "test_data/multi_sample_categories.json")
        ndg.saveData(dataOutputFile, categoriesOutputFile)

        # If no error is raised, then the data is in the correct format
        frs = FileRecordStream(dataOutputFile)

Пример #26

0

Показать файл

Файл: network.py Проект: keijinamba/nupic-example

def run(numRecords):
    '''
  Run the Hot Gym example.
  '''

    # Create a data source for the network.
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = min(numRecords, dataSource.getDataRowCount())
    network = createNetwork(dataSource)

    network.regions["sensor"].getSelf().predictedField = "sine"

    # Set predicted field
    network.regions["sensor"].setParameter("predictedField", "sine")

    # Enable learning for all regions.
    network.regions["SP"].setParameter("learningMode", 1)
    network.regions["TM"].setParameter("learningMode", 1)
    network.regions["classifier"].setParameter("learningMode", 1)

    # Enable inference for all regions.
    network.regions["SP"].setParameter("inferenceMode", 1)
    network.regions["TM"].setParameter("inferenceMode", 1)
    network.regions["classifier"].setParameter("inferenceMode", 1)

    results = []
    N = 1  # Run the network, N iterations at a time.
    output = nupic_output.NuPICPlotOutput("Sine", show_anomaly_score=True)
    for iteration in range(0, numRecords, N):
        network.run(N)

        sine = network.regions["sensor"].getOutputData("sourceOut")[0]

        predictionResults = getPredictionResults(network, "classifier")
        oneStep = predictionResults[1]["predictedValue"]
        oneStepConfidence = predictionResults[1]["predictionConfidence"]
        fiveStep = predictionResults[10]["predictedValue"]
        fiveStepConfidence = predictionResults[10]["predictionConfidence"]

        result = (oneStep, oneStepConfidence * 100, fiveStep,
                  fiveStepConfidence * 100)
        print "1-step: {:16} ({:4.4}%)\t 10-step: {:16} ({:4.4}%)".format(
            *result)
        results.append(result)

        output.write(sine, oneStep, 0)

    output.close()

    return results

Пример #27

0

Показать файл

Файл: stream_reader.py Проект: superRookie007/nupic

    def _openStream(self, dataUrl, isBlocking, maxTimeout, bookmark,
                    firstRecordIdx):
        """Open the underlying file stream.

    This only supports 'file://' prefixed paths.
    """
        filePath = dataUrl[len(FILE_PREF):]
        if not os.path.isabs(filePath):
            filePath = os.path.join(os.getcwd(), filePath)
        self._recordStoreName = filePath
        self._recordStore = FileRecordStream(streamID=self._recordStoreName,
                                             write=False,
                                             bookmark=bookmark,
                                             firstRecord=firstRecordIdx)

Пример #28

0

Показать файл

Файл: makeDatasets.py Проект: emmaai/nupic

def _generateScalar(filename="simple.csv",
                    numSequences=2,
                    elementsPerSeq=1,
                    numRepeats=10,
                    stepSize=0.1,
                    resets=False):
    """ Generate a simple dataset. This contains a bunch of non-overlapping
  sequences of scalar values. 
  
  Parameters:
  ----------------------------------------------------
  filename:       name of the file to produce, including extension. It will
                  be created in a 'datasets' sub-directory within the 
                  directory containing this script. 
  numSequences:   how many sequences to generate
  elementsPerSeq: length of each sequence
  numRepeats:     how many times to repeat each sequence in the output
  stepSize:       how far apart each scalar is 
  resets:         if True, turn on reset at start of each sequence
  """

    # Create the output file
    scriptDir = os.path.dirname(__file__)
    pathname = os.path.join(scriptDir, 'datasets', filename)
    print("Creating %s..." % (pathname))
    fields = [('reset', 'int', 'R'), ('category', 'int', 'C'),
              ('field1', 'float', '')]
    outFile = FileRecordStream(pathname, write=True, fields=fields)

    # Create the sequences
    sequences = []
    for i in range(numSequences):
        seq = [x for x in range(i * elementsPerSeq, (i + 1) * elementsPerSeq)]
        sequences.append(seq)

    # Write out the sequences in random order
    seqIdxs = []
    for i in range(numRepeats):
        seqIdxs += list(range(numSequences))
    random.shuffle(seqIdxs)

    for seqIdx in seqIdxs:
        reset = int(resets)
        seq = sequences[seqIdx]
        for x in seq:
            outFile.appendRecord([reset, str(seqIdx), x * stepSize])
            reset = 0

    outFile.close()

Пример #29

0

Показать файл

Файл: sorter.py Проект: ldd2816/nupic

def writeTestFile(testFile, fields, big):
  if big:
    print 'Creating big test file (763MB)...'
    payload = 'x' * 10 ** 8
  else:
    print 'Creating a small big test file...'
    payload = 'x' * 3
  with FileRecordStream(testFile, write=True, fields=fields) as o:
    print '.'; o.appendRecord([1,3,6, payload])
    print '.'; o.appendRecord([2,3,6, payload])
    print '.'; o.appendRecord([1,4,6, payload])
    print '.'; o.appendRecord([2,4,6, payload])
    print '.'; o.appendRecord([1,3,5, payload])
    print '.'; o.appendRecord([2,3,5, payload])
    print '.'; o.appendRecord([1,4,5, payload])
    print '.'; o.appendRecord([2,4,5, payload])

Пример #30

0

Показать файл

Файл: opf_region_test.py Проект: zhy0313/nupic

    def testSaveAndReload(self):
        """
    This function tests saving and loading. It will train a network for 500
    iterations, then save it and reload it as a second network instance. It will
    then run both networks for 100 iterations and ensure they return identical
    results.
    """

        print "Creating network..."

        netOPF = _createOPFNetwork()
        level1OPF = netOPF.regions['level1SP']

        # ==========================================================================
        print "Training network for 500 iterations"
        level1OPF.setParameter('learningMode', 1)
        level1OPF.setParameter('inferenceMode', 0)
        netOPF.run(500)
        level1OPF.setParameter('learningMode', 0)
        level1OPF.setParameter('inferenceMode', 1)

        # ==========================================================================
        # Save network and reload as a second instance. We need to reset the data
        # source for the unsaved network so that both instances start at the same
        # place
        print "Saving and reload network"
        _, tmpNetworkFilename = _setupTempDirectory("trained.nta")
        netOPF.save(tmpNetworkFilename)
        netOPF2 = Network(tmpNetworkFilename)
        level1OPF2 = netOPF2.regions['level1SP']

        sensor = netOPF.regions['sensor'].getSelf()
        trainFile = resource_filename("nupic.datafiles", "extra/gym/gym.csv")
        sensor.dataSource = FileRecordStream(streamID=trainFile)
        sensor.dataSource.setAutoRewind(True)

        # ==========================================================================
        print "Running inference on the two networks for 100 iterations"
        for _ in xrange(100):
            netOPF2.run(1)
            netOPF.run(1)
            l1outputOPF2 = level1OPF2.getOutputData("bottomUpOut")
            l1outputOPF = level1OPF.getOutputData("bottomUpOut")
            opfHash2 = l1outputOPF2.nonzero()[0].sum()
            opfHash = l1outputOPF.nonzero()[0].sum()

            self.assertEqual(opfHash2, opfHash)

Python FileRecordStream.FileRecordStream примеры использования