def getDescription(datasets): # ======================================================================== # Encoder for the sensor encoder = MultiEncoder() if config["encodingFieldStyleA"] == "contiguous": encoder.addEncoder( "fieldA", ScalarEncoder( w=config["encodingOnBitsA"], n=config["encodingFieldWidthA"], minval=0, maxval=config["numAValues"], periodic=True, name="fieldA", ), ) elif config["encodingFieldStyleA"] == "sdr": encoder.addEncoder( "fieldA", SDRCategoryEncoder( w=config["encodingOnBitsA"], n=config["encodingFieldWidthA"], categoryList=range(config["numAValues"]), name="fieldA", ), ) else: assert False if config["encodingFieldStyleB"] == "contiguous": encoder.addEncoder( "fieldB", ScalarEncoder( w=config["encodingOnBitsB"], n=config["encodingFieldWidthB"], minval=0, maxval=config["numBValues"], periodic=True, name="fieldB", ), ) elif config["encodingFieldStyleB"] == "sdr": encoder.addEncoder( "fieldB", SDRCategoryEncoder( w=config["encodingOnBitsB"], n=config["encodingFieldWidthB"], categoryList=range(config["numBValues"]), name="fieldB", ), ) else: assert False # ======================================================================== # Network definition # ------------------------------------------------------------------ # Node params # The inputs are long, horizontal vectors inputShape = (1, encoder.getWidth()) # Layout the coincidences vertically stacked on top of each other, each # looking at the entire input field. coincidencesShape = (config["spCoincCount"], 1) inputBorder = inputShape[1] / 2 if inputBorder * 2 >= inputShape[1]: inputBorder -= 1 sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config["sensorVerbosity"] ) CLAParams = dict( inputShape=inputShape, inputBorder=inputBorder, coincidencesShape=coincidencesShape, coincInputRadius=inputShape[1] / 2, coincInputPoolPct=1.0, gaussianDist=0, commonDistributions=0, # should be False if possibly not training localAreaDensity=-1, # 0.05, numActivePerInhArea=config["spNumActivePerInhArea"], dutyCyclePeriod=1000, stimulusThreshold=1, synPermInactiveDec=config["spSynPermInactiveDec"], synPermActiveInc=0.02, synPermActiveSharedDec=0.0, synPermOrphanDec=0.0, minPctDutyCycleBeforeInh=0.001, minPctDutyCycleAfterInh=config["spMinPctDutyCycleAfterInh"], minDistance=0.05, computeTopDown=1, spVerbosity=config["spVerbosity"], spSeed=1, printPeriodicStats=int(config["spPeriodicStats"]), # TP params disableTemporal=1, # General params trainingStep="spatial", ) trainingDataSource = FileRecordStream(datasets["trainingFilename"]) description = dict( options=dict(logOutputsDuringInference=False), network=dict( sensorDataSource=trainingDataSource, sensorEncoder=encoder, sensorParams=sensorParams, CLAType="py.CLARegion", CLAParams=CLAParams, classifierType=None, classifierParams=None, ), ) if config["trainSP"]: description["spTrain"] = ( dict( iterationCount=config["iterationCount"], # iter=displaySPCoincidences(50), finish=printSPCoincidences(), ), ) else: description["spTrain"] = dict( # need to train with one iteration just to initialize data structures iterationCount=1 ) # ============================================================================ # Inference tests inferSteps = [] # ---------------------------------------- # Training dataset if True: datasetName = "bothTraining" inferSteps.append( dict( name="%s_baseline" % datasetName, iterationCount=config["iterationCount"], setup=[sensorOpen(datasets["trainingFilename"])], ppOptions=dict(printLearnedCoincidences=True), ) ) inferSteps.append( dict( name="%s_acc" % datasetName, iterationCount=config["iterationCount"], setup=[sensorOpen(datasets["trainingFilename"])], ppOptions=dict( onlyClassificationAcc=True, tpActivationThresholds=config["tpActivationThresholds"], computeDistances=True, verbosity=1, ), ) ) # ---------------------------------------- # Testing dataset if "testingFilename" in datasets: datasetName = "bothTesting" inferSteps.append( dict( name="%s_baseline" % datasetName, iterationCount=config["iterationCount"], setup=[sensorOpen(datasets["testingFilename"])], ppOptions=dict(printLearnedCoincidences=False), ) ) inferSteps.append( dict( name="%s_acc" % datasetName, iterationCount=config["iterationCount"], setup=[sensorOpen(datasets["testingFilename"])], ppOptions=dict(onlyClassificationAcc=True, tpActivationThresholds=config["tpActivationThresholds"]), ) ) description["infer"] = inferSteps return description
def getDescriptionImpl(datasets, config): """ Implementation for description.py getDescription() entry point function. Builds an experiment description dictionary as required by LPF (Lightweight Prediction Framework). Hardcoded data that is less likely to vary between experiments is augmented with data from the config dictionary. See getBaseDatasets() and getDatasets(). datasets: a dictionary of input datasets that may have been pre-processed via aggregation. Keys: 'trainDataset' -- path to the training dataset 'inferDataset.N.alias' -- path(s) to the inference dataset config: configuration dictionary from description.py returns: an experiment description dictionary as required by LPF """ # ---------------------------------------------------------------------------- # Encoder for the sensor encoder = MultiEncoder(_getDatasetEncoderConfig(config)) # ------------------------------------------------------------------ # Region params CLAParams = _getCLAParams(encoder=encoder, config=config) sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config['sensorVerbosity'] ) # Filesource for the sensor. Set the filename in setup functions. dataSource = FileRecordStream('foo') description = dict( options = dict( logOutputsDuringInference = False, ), network = dict( # Think of sensor as a shell with dataSource and encoder; # Encoder has a pre-encoder and post-encoder filters; # filters appear in a different place (TODO: where?) sensorDataSource = dataSource, sensorEncoder = encoder, # LPF converts this to JSON strings; used as constructor args; has simple # types (ints, strings, floats) sensorParams = sensorParams, # CLA class; py. prefix for class names implemented in python; older code # implemented regions in C++ and designated class name without prefix. CLAType = 'py.CLARegion', # dict converted to JSON string CLAParams = CLAParams, # classifiers are presently not used (formerly used by vision code); should # be okay to leave out Classifier, sensor, CLA classifierType = None, classifierParams = None), ) # ---------------------------------------------------------------------------- # Configure Training and Inference phases # ---------------------------------------------------------------------------- # # phase is 0 or more steps (a list of dictionaries, each dict corresponds to one step) # (see py/nupic/frameworks/prediction/experiment.py docstring) # # step = dict (name, setup, iter, finish, iterationCount) # setup, iter, finish are callbacks; # # name: step name string; optional, used for printing messages to console # setup: open input file (e.g., via dataSource), print stats, etc. # iter: for diagnostics/debugging; called by net.run between iterations. # finish: called at the end by net.run; usually prints out stats (e.g., how many # synapses, time taken, etc.) # callbacks are almost always reused, so they are not experiment-specific (see # imports at top of file) # a callback always has this form c(experiment_obj, iter_number); can get # experiment.network.regions["sensor"].getSelf() spEnable = config['spEnable'] spTrain = _isSPTrainingEnabled(config) tpEnable = config['tpEnable'] tpTrain = _isTPTrainingEnabled(config) # NOTE: presently, we always train TP (during training phase) if TP is enabled assert(tpTrain == tpEnable) # At least one of SP/TP must be enabled for a meaningful system assert(spEnable or tpEnable) # NOTE: SP and Spatial regression need to undergo training over the same # set of rows. Since we're not reading the training dataset here to # find out the number of rows, we presently configure both with the # same auto-rewind setting. # TODO: this may cause knn training to repeatedly iterate unnecessarily # over the same records in case spTrainIterationCount is larger than the # nuber of rows in the training dataset. Look into optimizing this to # avoid wasting time on knn training due to unnecessary iterations, but # make sure that both SP and knn train on the exact same rows. spTrainMayNeedAutoRewind = True \ if config['spTrainIterationCount'] is not None \ else False # ---------------------------------------------------------------------------- # SP training if spTrain: description['spTrain'] = [] for i in xrange(config['spTrainNPasses']): stepDict = dict( name='sp.train.pass_%d' % (i), iterationCount=config['spTrainIterationCount'], setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \ else sensorRewind, fileSourceAutoRewind(spTrainMayNeedAutoRewind),], finish=[fileSourceAutoRewind(False),], ) description['spTrain'].append(stepDict) elif spEnable: description['spTrain'] = dict( # need to train with one iteration just to initialize data structures # TODO: seems like a hack; shouldn't CLA framework automatically initialize # the necessary subsystems? (ask Ron) iterationCount=1, ) # ---------------------------------------------------------------------------- # TP training if tpTrain: description['tpTrain'] = [] mayNeedAutoRewind = True if config['tpTrainIterationCount'] is not None else False for i in xrange(config['tpTrainNPasses']): stepDict = dict( name='tp.train.pass_%d' % (i), iterationCount=config['tpTrainIterationCount'], setup=[ sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \ else sensorRewind, fileSourceAutoRewind(mayNeedAutoRewind), ], finish=[fileSourceAutoRewind(False),], ) if config['tpTrainPrintStatsPeriodIter'] > 0: stepDict['iter'] = printTPTiming(config['tpTrainPrintStatsPeriodIter']) stepDict['finish'] += [printTPTiming()] #, printTPCells] description['tpTrain'].append(stepDict) # ---------------------------------------------------------------------------- # Inference tests # NOTE: Presently, SP and TP learning is disabled during inference description['infer'] = [] predictionFields = None spatialRegrTests = None if 'spFieldPredictionSchema' in config and config['spFieldPredictionSchema'] != None: if len(config['spFieldPredictionSchema']['predictionFields']) > 0: spFieldPredictionSchema = config['spFieldPredictionSchema'] predictionFields = spFieldPredictionSchema['predictionFields'] if len(spFieldPredictionSchema['regressionTests']) > 0: # presently, our spatial regression modules (knn and linear) don't support # multiple fields assert(len(predictionFields) == 1) spatialRegrTests = spFieldPredictionSchema['regressionTests'] # Set up test steps for all inference datasets for i, ds in enumerate(config['inferDatasets']): datasetInfo = config['inferDatasets'][i] # NOTE: the path/contents may differ from the corresponding dataset # referenced in config['inferDatasets'] due to preprocessing (e.g., # aggregation) inferenceDatasetKey = \ _datasetKeyFromInferenceDatasetIndex(index=i, config=config) inferenceDatasetPath = datasets[inferenceDatasetKey] # ---------------------------------------- # Step: Temporal inference # if tpEnable: # Turn off plot histograms when running under darwin plotTemporalHistograms = True if sys.platform.startswith('darwin'): plotTemporalHistograms = False print "Turning off plotTemporalHistograms under darwin" temporalTestingStep = dict( name = getTemporalInferenceStepName(datasetInfo['alias'], i), iterationCount = ds['iterCount'], setup = [sensorOpen(inferenceDatasetPath)], ppOptions = dict(verbosity=config['postprocVerbosity'], plotTemporalHistograms=plotTemporalHistograms, printLearnedCoincidences=False, logPredictions=True,) ) description['infer'].append(temporalTestingStep) else: print 'temporalTestingStep skipped.' # ---------------------------------------- # Step: Non-temporal Regression algorithm training (if enabled) # if spatialRegrTests: # NOTE: we don't need auto-rewind when training spatial regression algorithms regrTrainStep = dict( name = ('%s_nontemporal.training') % \ (_normalizeDatasetAliasNameForStepName(datasetInfo['alias']),), iterationCount=config['spTrainIterationCount'], setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]), fileSourceAutoRewind(spTrainMayNeedAutoRewind),], ppOptions = dict(verbosity=config['postprocVerbosity'], printLearnedCoincidences=False,) ) # Add Spatial Regression algorithm training requests ppOptions = regrTrainStep['ppOptions'] for test in spatialRegrTests: assert(len(predictionFields) == 1) ppOptions[test['algorithm']] = 'train,%s' % (predictionFields[0]) description['infer'].append(regrTrainStep) # ---------------------------------------- # Step: Non-temporal Inference # nontemporalTestingStep = dict( name = getNonTemporalInferenceStepName(datasetInfo['alias'], i), iterationCount = ds['iterCount'], setup = [ sensorOpen(inferenceDatasetPath), fileSourceAutoRewind(False), # TODO Do we need to turn off collectStats in the 'finish' sub-step? setTPAttribute('collectStats', 1), ], # TODO which ppOptions do we want in this template? ppOptions = dict( verbosity=config['postprocVerbosity'], plotTemporalHistograms=False, printLearnedCoincidences=False, logPredictions=True, ), ) # Add Spatial Field Prediction options to inference step if predictionFields: # Set sparse encodings of prediction fields to zero setup = nontemporalTestingStep['setup'] setup.append( setAttribute('sensor', 'postEncodingFilters', [ModifyFields(fields=predictionFields, operation='setToZero')]) ) if spatialRegrTests: # Add regression test requests ppOptions = nontemporalTestingStep['ppOptions'] for test in spatialRegrTests: assert(len(predictionFields) == 1) ppOptions[test['algorithm']] = 'test,%s' % (predictionFields[0]) description['infer'].append(nontemporalTestingStep) # ---------------------------------------------------------------------------- # Add auto-reset intervals to the sensor region for tpTrain and Infer phases # (if config['sensorAutoReset'] is enabled) # ---------------------------------------------------------------------------- if 'sensorAutoReset' in config and config['sensorAutoReset'] is not None: dd = defaultdict(lambda: 0, config['sensorAutoReset']) # class timedelta([days[, seconds[, microseconds[, milliseconds[, minutes[, # hours[, weeks]]]]]]]) if not (0 == dd['days'] == dd['hours'] == dd['minutes'] == dd['seconds'] \ == dd['milliseconds'] == dd['microseconds'] == dd['weeks']): timeDelta = timedelta(days=dd['days'], hours=dd['hours'], minutes=dd['minutes'], seconds=dd['seconds'], milliseconds=dd['milliseconds'], microseconds=dd['microseconds'], weeks=dd['weeks']) tpTrainSteps = description['tpTrain'] if 'tpTrain' in description else [] inferSteps = description['infer'] if 'infer' in description else [] for step in itertools.chain(tpTrainSteps, inferSteps): if 'setup' not in step: step['setup'] = [] step['setup'].append(setAutoResetInterval(timeDelta)) return description
def getDescription(datasets): # ======================================================================== # Encoder for the sensor encoder = MultiEncoder() if config['encodingFieldStyleA'] == 'contiguous': encoder.addEncoder('fieldA', ScalarEncoder(w=config['encodingOnBitsA'], n=config['encodingFieldWidthA'], minval=0, maxval=config['numAValues'], periodic=True, name='fieldA')) elif config['encodingFieldStyleA'] == 'sdr': encoder.addEncoder('fieldA', SDRCategoryEncoder(w=config['encodingOnBitsA'], n=config['encodingFieldWidthA'], categoryList=range(config['numAValues']), name='fieldA')) else: assert False if config['encodingFieldStyleB'] == 'contiguous': encoder.addEncoder('fieldB', ScalarEncoder(w=config['encodingOnBitsB'], n=config['encodingFieldWidthB'], minval=0, maxval=config['numBValues'], periodic=True, name='fieldB')) elif config['encodingFieldStyleB'] == 'sdr': encoder.addEncoder('fieldB', SDRCategoryEncoder(w=config['encodingOnBitsB'], n=config['encodingFieldWidthB'], categoryList=range(config['numBValues']), name='fieldB')) else: assert False # ======================================================================== # Network definition # ------------------------------------------------------------------ # Node params # The inputs are long, horizontal vectors inputDimensions = (1, encoder.getWidth()) # Layout the coincidences vertically stacked on top of each other, each # looking at the entire input field. columnDimensions = (config['spCoincCount'], 1) sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config['sensorVerbosity'] ) CLAParams = dict( inputDimensions = inputDimensions, columnDimensions = columnDimensions, potentialRadius = inputDimensions[1]/2, potentialPct = 1.0, gaussianDist = 0, commonDistributions = 0, # should be False if possibly not training localAreaDensity = -1, #0.05, numActiveColumnsPerInhArea = config['spNumActivePerInhArea'], dutyCyclePeriod = 1000, stimulusThreshold = 1, synPermInactiveDec = config['spSynPermInactiveDec'], synPermActiveInc = 0.02, synPermActiveSharedDec=0.0, synPermOrphanDec = 0.0, minPctDutyCycleBeforeInh = 0.001, minPctDutyCycleAfterInh = config['spMinPctDutyCycleAfterInh'], minDistance = 0.05, computeTopDown = 1, spVerbosity = config['spVerbosity'], spSeed = 1, printPeriodicStats = int(config['spPeriodicStats']), # TP params disableTemporal = 1, # General params trainingStep = 'spatial', ) trainingDataSource = FileRecordStream(datasets['trainingFilename']) description = dict( options = dict( logOutputsDuringInference = False, ), network = dict( sensorDataSource = trainingDataSource, sensorEncoder = encoder, sensorParams = sensorParams, CLAType = 'py.CLARegion', CLAParams = CLAParams, classifierType = None, classifierParams = None), ) if config['trainSP']: description['spTrain'] = dict( iterationCount=config['iterationCount'], #iter=displaySPCoincidences(50), finish=printSPCoincidences() ), else: description['spTrain'] = dict( # need to train with one iteration just to initialize data structures iterationCount=1) # ============================================================================ # Inference tests inferSteps = [] # ---------------------------------------- # Training dataset if True: datasetName = 'bothTraining' inferSteps.append( dict(name = '{0!s}_baseline'.format(datasetName), iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['trainingFilename'])], ppOptions = dict(printLearnedCoincidences=True), ) ) inferSteps.append( dict(name = '{0!s}_acc'.format(datasetName), iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['trainingFilename'])], ppOptions = dict(onlyClassificationAcc=True, tpActivationThresholds=config['tpActivationThresholds'], computeDistances=True, verbosity = 1), ) ) # ---------------------------------------- # Testing dataset if 'testingFilename' in datasets: datasetName = 'bothTesting' inferSteps.append( dict(name = '{0!s}_baseline'.format(datasetName), iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['testingFilename'])], ppOptions = dict(printLearnedCoincidences=False), ) ) inferSteps.append( dict(name = '{0!s}_acc'.format(datasetName), iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['testingFilename'])], ppOptions = dict(onlyClassificationAcc=True, tpActivationThresholds=config['tpActivationThresholds']), ) ) description['infer'] = inferSteps return description
def getDescription(datasets): # ======================================================================== # Network definition # Encoder for the sensor encoder = MultiEncoder() if 'filenameCategory' in datasets: categories = [x.strip() for x in open(datasets['filenameCategory'])] else: categories = [chr(x + ord('a')) for x in range(26)] if config['overlappingPatterns']: encoder.addEncoder( "name", SDRCategoryEncoder(n=200, w=config['spNumActivePerInhArea'], categoryList=categories, name="name")) else: encoder.addEncoder( "name", CategoryEncoder(w=config['spNumActivePerInhArea'], categoryList=categories, name="name")) # ------------------------------------------------------------------ # Node params # The inputs are long, horizontal vectors inputDimensions = (1, encoder.getWidth()) # Layout the coincidences vertically stacked on top of each other, each # looking at the entire input field. columnDimensions = (config['spCoincCount'], 1) # If we have disableSpatial, then set the number of "coincidences" to be the # same as the encoder width if config['disableSpatial']: columnDimensions = (encoder.getWidth(), 1) config['trainSP'] = 0 sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config['sensorVerbosity']) CLAParams = dict( # SP params disableSpatial=config['disableSpatial'], inputDimensions=inputDimensions, columnDimensions=columnDimensions, potentialRadius=inputDimensions[1] / 2, potentialPct=1.00, gaussianDist=0, commonDistributions=0, # should be False if possibly not training localAreaDensity=-1, #0.05, numActiveColumnsPerInhArea=config['spNumActivePerInhArea'], dutyCyclePeriod=1000, stimulusThreshold=1, synPermInactiveDec=0.11, synPermActiveInc=0.11, synPermActiveSharedDec=0.0, synPermOrphanDec=0.0, minPctDutyCycleBeforeInh=0.001, minPctDutyCycleAfterInh=0.001, spVerbosity=config['spVerbosity'], spSeed=1, printPeriodicStats=int(config['spPrintPeriodicStats']), # TM params tpSeed=1, disableTemporal=0 if config['trainTP'] else 1, temporalImp=config['temporalImp'], nCellsPerCol=config['tpNCellsPerCol'] if config['trainTP'] else 1, collectStats=1, burnIn=2, verbosity=config['tpVerbosity'], newSynapseCount=config['spNumActivePerInhArea'], minThreshold=config['spNumActivePerInhArea'], activationThreshold=config['spNumActivePerInhArea'], initialPerm=config['tpInitialPerm'], connectedPerm=0.5, permanenceInc=config['tpPermanenceInc'], permanenceDec=config['tpPermanenceDec'], # perhaps tune this globalDecay=config['tpGlobalDecay'], pamLength=config['tpPAMLength'], maxSeqLength=config['tpMaxSeqLength'], maxAge=config['tpMaxAge'], # General params computeTopDown=config['computeTopDown'], trainingStep='spatial', ) dataSource = FileRecordStream(datasets['filenameTrain']) description = dict( options=dict(logOutputsDuringInference=False, ), network=dict(sensorDataSource=dataSource, sensorEncoder=encoder, sensorParams=sensorParams, CLAType='py.CLARegion', CLAParams=CLAParams, classifierType=None, classifierParams=None), ) if config['trainSP']: description['spTrain'] = dict( iterationCount=config['iterationCountTrain'], #iter=displaySPCoincidences(50), #finish=printSPCoincidences() ), else: description['spTrain'] = dict( # need to train with one iteration just to initialize data structures iterationCount=1) if config['trainTP']: description['tpTrain'] = [] for i in range(config['trainTPRepeats']): stepDict = dict( name='step_%d' % (i), setup=sensorRewind, iterationCount=config['iterationCountTrain'], ) if config['tpTimingEvery'] > 0: stepDict['iter'] = printTPTiming(config['tpTimingEvery']) stepDict['finish'] = [printTPTiming(), printTPCells] description['tpTrain'].append(stepDict) # ---------------------------------------------------------------------------- # Inference tests inferSteps = [] if config['evalTrainingSetNumIterations'] > 0: # The training set. Used to train the n-grams. inferSteps.append( dict( name='confidenceTrain_baseline', iterationCount=min(config['evalTrainingSetNumIterations'], config['iterationCountTrain']), ppOptions=dict( verbosity=config['ppVerbosity'], printLearnedCoincidences=True, nGrams='train', #ipsDetailsFor = "name,None,2", ), #finish=printTPCells, )) # Testing the training set on both the TM and n-grams. inferSteps.append( dict( name='confidenceTrain_nonoise', iterationCount=min(config['evalTrainingSetNumIterations'], config['iterationCountTrain']), setup=[sensorOpen(datasets['filenameTrain'])], ppOptions=dict( verbosity=config['ppVerbosity'], printLearnedCoincidences=False, nGrams='test', burnIns=[1, 2, 3, 4], #ipsDetailsFor = "name,None,2", #ipsAt = [1,2,3,4], ), )) # The test set if True: if datasets['filenameTest'] != datasets['filenameTrain']: inferSteps.append( dict( name='confidenceTest_baseline', iterationCount=config['iterationCountTest'], setup=[sensorOpen(datasets['filenameTest'])], ppOptions=dict( verbosity=config['ppVerbosity'], printLearnedCoincidences=False, nGrams='test', burnIns=[1, 2, 3, 4], #ipsAt = [1,2,3,4], ipsDetailsFor="name,None,2", ), )) description['infer'] = inferSteps return description
def getDescription(datasets): # ======================================================================== # Encoder for the sensor encoder = MultiEncoder() if config['encodingFieldStyleA'] == 'contiguous': encoder.addEncoder('fieldA', ScalarEncoder(w=config['encodingOnBitsA'], n=config['encodingFieldWidthA'], minval=0, maxval=config['numAValues'], periodic=True, name='fieldA')) elif config['encodingFieldStyleA'] == 'sdr': encoder.addEncoder('fieldA', SDRCategoryEncoder(w=config['encodingOnBitsA'], n=config['encodingFieldWidthA'], categoryList=range(config['numAValues']), name='fieldA')) else: assert False if config['encodingFieldStyleB'] == 'contiguous': encoder.addEncoder('fieldB', ScalarEncoder(w=config['encodingOnBitsB'], n=config['encodingFieldWidthB'], minval=0, maxval=config['numBValues'], periodic=True, name='fieldB')) elif config['encodingFieldStyleB'] == 'zero': encoder.addEncoder('fieldB', SDRRandomEncoder(w=0, n=config['encodingFieldWidthB'], name='fieldB')) elif config['encodingFieldStyleB'] == 'sdr': encoder.addEncoder('fieldB', SDRCategoryEncoder(w=config['encodingOnBitsB'], n=config['encodingFieldWidthB'], categoryList=range(config['numBValues']), name='fieldB')) else: assert False # ======================================================================== # Network definition # ------------------------------------------------------------------ # Node params # The inputs are long, horizontal vectors inputShape = (1, encoder.getWidth()) # Layout the coincidences vertically stacked on top of each other, each # looking at the entire input field. coincidencesShape = (config['spCoincCount'], 1) inputBorder = inputShape[1]/2 if inputBorder*2 >= inputShape[1]: inputBorder -= 1 sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config['sensorVerbosity'] ) CLAParams = dict( inputShape = inputShape, inputBorder = inputBorder, coincidencesShape = coincidencesShape, coincInputRadius = inputShape[1]/2, coincInputPoolPct = 1.0, gaussianDist = 0, commonDistributions = 0, # should be False if possibly not training localAreaDensity = -1, #0.05, numActivePerInhArea = config['spNumActivePerInhArea'], dutyCyclePeriod = 1000, stimulusThreshold = 1, synPermInactiveDec = config['spSynPermInactiveDec'], synPermActiveInc = 0.02, synPermActiveSharedDec=0.0, synPermOrphanDec = 0.0, minPctDutyCycleBeforeInh = 0.001, minPctDutyCycleAfterInh = config['spMinPctDutyCycleAfterInh'], minDistance = 0.05, computeTopDown = 1, spVerbosity = config['spVerbosity'], spSeed = 1, printPeriodicStats = int(config['spPeriodicStats']), # TP params disableTemporal = 1, # General params trainingStep = 'spatial', ) trainingDataSource = FileRecordStream(datasets['trainingFilename']) description = dict( options = dict( logOutputsDuringInference = False, ), network = dict( sensorDataSource = trainingDataSource, sensorEncoder = encoder, sensorParams = sensorParams, CLAType = 'py.CLARegion', CLAParams = CLAParams, classifierType = None, classifierParams = None), ) if config['trainSP']: description['spTrain'] = dict( iterationCount=config['iterationCount'], #iter=displaySPCoincidences(50), finish=printSPCoincidences() ), else: description['spTrain'] = dict( # need to train with one iteration just to initialize data structures iterationCount=1) # ============================================================================ # Inference tests inferSteps = [] # ---------------------------------------- # Training dataset if True: datasetName = 'bothTraining' inferSteps.append( dict(name = '%s_baseline' % datasetName, iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['trainingFilename'])], ppOptions = dict(printLearnedCoincidences=True), ) ) inferSteps.append( dict(name = '%s_acc' % datasetName, iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['trainingFilename'])], ppOptions = dict(onlyClassificationAcc=True, tpActivationThresholds=config['tpActivationThresholds'], computeDistances=True, verbosity = 1), ) ) # ---------------------------------------- # Testing dataset if 'testingFilename' in datasets: datasetName = 'bothTesting' inferSteps.append( dict(name = '%s_baseline' % datasetName, iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['testingFilename'])], ppOptions = dict(printLearnedCoincidences=False), ) ) inferSteps.append( dict(name = '%s_acc' % datasetName, iterationCount = config['iterationCount'], setup = [sensorOpen(datasets['testingFilename'])], ppOptions = dict(onlyClassificationAcc=True, tpActivationThresholds=config['tpActivationThresholds']), ) ) description['infer'] = inferSteps return description
def getDescription(datasets): # ======================================================================== # Network definition # Encoder for the sensor encoder = MultiEncoder() if 'filenameCategory' in datasets: categories = [x.strip() for x in open(datasets['filenameCategory']).xreadlines()] else: categories = [chr(x+ord('a')) for x in range(26)] if config['overlappingPatterns']: encoder.addEncoder("name", SDRCategoryEncoder(n=200, w=config['spNumActivePerInhArea'], categoryList=categories, name="name")) else: encoder.addEncoder("name", CategoryEncoder(w=config['spNumActivePerInhArea'], categoryList=categories, name="name")) # ------------------------------------------------------------------ # Node params # The inputs are long, horizontal vectors inputDimensions = (1, encoder.getWidth()) # Layout the coincidences vertically stacked on top of each other, each # looking at the entire input field. columnDimensions = (config['spCoincCount'], 1) # If we have disableSpatial, then set the number of "coincidences" to be the # same as the encoder width if config['disableSpatial']: columnDimensions = (encoder.getWidth(), 1) config['trainSP'] = 0 sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config['sensorVerbosity'] ) CLAParams = dict( # SP params disableSpatial = config['disableSpatial'], inputDimensions = inputDimensions, columnDimensions = columnDimensions, potentialRadius = inputDimensions[1]/2, potentialPct = 1.00, gaussianDist = 0, commonDistributions = 0, # should be False if possibly not training localAreaDensity = -1, #0.05, numActiveColumnsPerInhArea = config['spNumActivePerInhArea'], dutyCyclePeriod = 1000, stimulusThreshold = 1, synPermInactiveDec=0.11, synPermActiveInc=0.11, synPermActiveSharedDec=0.0, synPermOrphanDec = 0.0, minPctDutyCycleBeforeInh = 0.001, minPctDutyCycleAfterInh = 0.001, spVerbosity = config['spVerbosity'], spSeed = 1, printPeriodicStats = int(config['spPrintPeriodicStats']), # TP params tpSeed = 1, disableTemporal = 0 if config['trainTP'] else 1, temporalImp = config['temporalImp'], nCellsPerCol = config['tpNCellsPerCol'] if config['trainTP'] else 1, collectStats = 1, burnIn = 2, verbosity = config['tpVerbosity'], newSynapseCount = config['spNumActivePerInhArea'], minThreshold = config['spNumActivePerInhArea'], activationThreshold = config['spNumActivePerInhArea'], initialPerm = config['tpInitialPerm'], connectedPerm = 0.5, permanenceInc = config['tpPermanenceInc'], permanenceDec = config['tpPermanenceDec'], # perhaps tune this globalDecay = config['tpGlobalDecay'], pamLength = config['tpPAMLength'], maxSeqLength = config['tpMaxSeqLength'], maxAge = config['tpMaxAge'], # General params computeTopDown = config['computeTopDown'], trainingStep = 'spatial', ) dataSource = FileRecordStream(datasets['filenameTrain']) description = dict( options = dict( logOutputsDuringInference = False, ), network = dict( sensorDataSource = dataSource, sensorEncoder = encoder, sensorParams = sensorParams, CLAType = 'py.CLARegion', CLAParams = CLAParams, classifierType = None, classifierParams = None), ) if config['trainSP']: description['spTrain'] = dict( iterationCount=config['iterationCountTrain'], #iter=displaySPCoincidences(50), #finish=printSPCoincidences() ), else: description['spTrain'] = dict( # need to train with one iteration just to initialize data structures iterationCount=1) if config['trainTP']: description['tpTrain'] = [] for i in xrange(config['trainTPRepeats']): stepDict = dict(name='step_%d' % (i), setup=sensorRewind, iterationCount=config['iterationCountTrain'], ) if config['tpTimingEvery'] > 0: stepDict['iter'] = printTPTiming(config['tpTimingEvery']) stepDict['finish'] = [printTPTiming(), printTPCells] description['tpTrain'].append(stepDict) # ---------------------------------------------------------------------------- # Inference tests inferSteps = [] if config['evalTrainingSetNumIterations'] > 0: # The training set. Used to train the n-grams. inferSteps.append( dict(name = 'confidenceTrain_baseline', iterationCount = min(config['evalTrainingSetNumIterations'], config['iterationCountTrain']), ppOptions = dict(verbosity=config['ppVerbosity'], printLearnedCoincidences=True, nGrams='train', #ipsDetailsFor = "name,None,2", ), #finish=printTPCells, ) ) # Testing the training set on both the TP and n-grams. inferSteps.append( dict(name = 'confidenceTrain_nonoise', iterationCount = min(config['evalTrainingSetNumIterations'], config['iterationCountTrain']), setup = [sensorOpen(datasets['filenameTrain'])], ppOptions = dict(verbosity=config['ppVerbosity'], printLearnedCoincidences=False, nGrams='test', burnIns = [1,2,3,4], #ipsDetailsFor = "name,None,2", #ipsAt = [1,2,3,4], ), ) ) # The test set if True: if datasets['filenameTest'] != datasets['filenameTrain']: inferSteps.append( dict(name = 'confidenceTest_baseline', iterationCount = config['iterationCountTest'], setup = [sensorOpen(datasets['filenameTest'])], ppOptions = dict(verbosity=config['ppVerbosity'], printLearnedCoincidences=False, nGrams='test', burnIns = [1,2,3,4], #ipsAt = [1,2,3,4], ipsDetailsFor = "name,None,2", ), ) ) description['infer'] = inferSteps return description
def getDescriptionImpl(datasets, config): """ Implementation for description.py getDescription() entry point function. Builds an experiment description dictionary as required by LPF (Lightweight Prediction Framework). Hardcoded data that is less likely to vary between experiments is augmented with data from the config dictionary. See getBaseDatasets() and getDatasets(). datasets: a dictionary of input datasets that may have been pre-processed via aggregation. Keys: 'trainDataset' -- path to the training dataset 'inferDataset.N.alias' -- path(s) to the inference dataset config: configuration dictionary from description.py returns: an experiment description dictionary as required by LPF """ # ---------------------------------------------------------------------------- # Encoder for the sensor encoder = MultiEncoder(_getDatasetEncoderConfig(config)) # ------------------------------------------------------------------ # Region params CLAParams = _getCLAParams(encoder=encoder, config=config) sensorParams = dict( # encoder/datasource are not parameters so don't include here verbosity=config['sensorVerbosity']) # Filesource for the sensor. Set the filename in setup functions. dataSource = FileRecordStream('foo') description = dict( options=dict(logOutputsDuringInference=False, ), network=dict( # Think of sensor as a shell with dataSource and encoder; # Encoder has a pre-encoder and post-encoder filters; # filters appear in a different place (TODO: where?) sensorDataSource=dataSource, sensorEncoder=encoder, # LPF converts this to JSON strings; used as constructor args; has simple # types (ints, strings, floats) sensorParams=sensorParams, # CLA class; py. prefix for class names implemented in python; older code # implemented regions in C++ and designated class name without prefix. CLAType='py.CLARegion', # dict converted to JSON string CLAParams=CLAParams, # classifiers are presently not used (formerly used by vision code); should # be okay to leave out Classifier, sensor, CLA classifierType=None, classifierParams=None), ) # ---------------------------------------------------------------------------- # Configure Training and Inference phases # ---------------------------------------------------------------------------- # # phase is 0 or more steps (a list of dictionaries, each dict corresponds to one step) # (see py/nupic/frameworks/prediction/experiment.py docstring) # # step = dict (name, setup, iter, finish, iterationCount) # setup, iter, finish are callbacks; # # name: step name string; optional, used for printing messages to console # setup: open input file (e.g., via dataSource), print stats, etc. # iter: for diagnostics/debugging; called by net.run between iterations. # finish: called at the end by net.run; usually prints out stats (e.g., how many # synapses, time taken, etc.) # callbacks are almost always reused, so they are not experiment-specific (see # imports at top of file) # a callback always has this form c(experiment_obj, iter_number); can get # experiment.network.regions["sensor"].getSelf() spEnable = config['spEnable'] spTrain = _isSPTrainingEnabled(config) tpEnable = config['tpEnable'] tpTrain = _isTPTrainingEnabled(config) # NOTE: presently, we always train TP (during training phase) if TP is enabled assert (tpTrain == tpEnable) # At least one of SP/TP must be enabled for a meaningful system assert (spEnable or tpEnable) # NOTE: SP and Spatial regression need to undergo training over the same # set of rows. Since we're not reading the training dataset here to # find out the number of rows, we presently configure both with the # same auto-rewind setting. # TODO: this may cause knn training to repeatedly iterate unnecessarily # over the same records in case spTrainIterationCount is larger than the # nuber of rows in the training dataset. Look into optimizing this to # avoid wasting time on knn training due to unnecessary iterations, but # make sure that both SP and knn train on the exact same rows. spTrainMayNeedAutoRewind = True \ if config['spTrainIterationCount'] is not None \ else False # ---------------------------------------------------------------------------- # SP training if spTrain: description['spTrain'] = [] for i in xrange(config['spTrainNPasses']): stepDict = dict( name='sp.train.pass_%d' % (i), iterationCount=config['spTrainIterationCount'], setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \ else sensorRewind, fileSourceAutoRewind(spTrainMayNeedAutoRewind),], finish=[fileSourceAutoRewind(False),], ) description['spTrain'].append(stepDict) elif spEnable: description['spTrain'] = dict( # need to train with one iteration just to initialize data structures # TODO: seems like a hack; shouldn't CLA framework automatically initialize # the necessary subsystems? (ask Ron) iterationCount=1, ) # ---------------------------------------------------------------------------- # TP training if tpTrain: description['tpTrain'] = [] mayNeedAutoRewind = True if config[ 'tpTrainIterationCount'] is not None else False for i in xrange(config['tpTrainNPasses']): stepDict = dict( name='tp.train.pass_%d' % (i), iterationCount=config['tpTrainIterationCount'], setup=[ sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \ else sensorRewind, fileSourceAutoRewind(mayNeedAutoRewind), ], finish=[fileSourceAutoRewind(False),], ) if config['tpTrainPrintStatsPeriodIter'] > 0: stepDict['iter'] = printTPTiming( config['tpTrainPrintStatsPeriodIter']) stepDict['finish'] += [printTPTiming()] #, printTPCells] description['tpTrain'].append(stepDict) # ---------------------------------------------------------------------------- # Inference tests # NOTE: Presently, SP and TP learning is disabled during inference description['infer'] = [] predictionFields = None spatialRegrTests = None if 'spFieldPredictionSchema' in config and config[ 'spFieldPredictionSchema'] != None: if len(config['spFieldPredictionSchema']['predictionFields']) > 0: spFieldPredictionSchema = config['spFieldPredictionSchema'] predictionFields = spFieldPredictionSchema['predictionFields'] if len(spFieldPredictionSchema['regressionTests']) > 0: # presently, our spatial regression modules (knn and linear) don't support # multiple fields assert (len(predictionFields) == 1) spatialRegrTests = spFieldPredictionSchema['regressionTests'] # Set up test steps for all inference datasets for i, ds in enumerate(config['inferDatasets']): datasetInfo = config['inferDatasets'][i] # NOTE: the path/contents may differ from the corresponding dataset # referenced in config['inferDatasets'] due to preprocessing (e.g., # aggregation) inferenceDatasetKey = \ _datasetKeyFromInferenceDatasetIndex(index=i, config=config) inferenceDatasetPath = datasets[inferenceDatasetKey] # ---------------------------------------- # Step: Temporal inference # if tpEnable: # Turn off plot histograms when running under darwin plotTemporalHistograms = True if sys.platform.startswith('darwin'): plotTemporalHistograms = False print "Turning off plotTemporalHistograms under darwin" temporalTestingStep = dict( name=getTemporalInferenceStepName(datasetInfo['alias'], i), iterationCount=ds['iterCount'], setup=[sensorOpen(inferenceDatasetPath)], ppOptions=dict( verbosity=config['postprocVerbosity'], plotTemporalHistograms=plotTemporalHistograms, printLearnedCoincidences=False, logPredictions=True, )) description['infer'].append(temporalTestingStep) else: print 'temporalTestingStep skipped.' # ---------------------------------------- # Step: Non-temporal Regression algorithm training (if enabled) # if spatialRegrTests: # NOTE: we don't need auto-rewind when training spatial regression algorithms regrTrainStep = dict( name = ('%s_nontemporal.training') % \ (_normalizeDatasetAliasNameForStepName(datasetInfo['alias']),), iterationCount=config['spTrainIterationCount'], setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]), fileSourceAutoRewind(spTrainMayNeedAutoRewind),], ppOptions = dict(verbosity=config['postprocVerbosity'], printLearnedCoincidences=False,) ) # Add Spatial Regression algorithm training requests ppOptions = regrTrainStep['ppOptions'] for test in spatialRegrTests: assert (len(predictionFields) == 1) ppOptions[ test['algorithm']] = 'train,%s' % (predictionFields[0]) description['infer'].append(regrTrainStep) # ---------------------------------------- # Step: Non-temporal Inference # nontemporalTestingStep = dict( name=getNonTemporalInferenceStepName(datasetInfo['alias'], i), iterationCount=ds['iterCount'], setup=[ sensorOpen(inferenceDatasetPath), fileSourceAutoRewind(False), # TODO Do we need to turn off collectStats in the 'finish' sub-step? setTPAttribute('collectStats', 1), ], # TODO which ppOptions do we want in this template? ppOptions=dict( verbosity=config['postprocVerbosity'], plotTemporalHistograms=False, printLearnedCoincidences=False, logPredictions=True, ), ) # Add Spatial Field Prediction options to inference step if predictionFields: # Set sparse encodings of prediction fields to zero setup = nontemporalTestingStep['setup'] setup.append( setAttribute('sensor', 'postEncodingFilters', [ ModifyFields(fields=predictionFields, operation='setToZero') ])) if spatialRegrTests: # Add regression test requests ppOptions = nontemporalTestingStep['ppOptions'] for test in spatialRegrTests: assert (len(predictionFields) == 1) ppOptions[ test['algorithm']] = 'test,%s' % (predictionFields[0]) description['infer'].append(nontemporalTestingStep) # ---------------------------------------------------------------------------- # Add auto-reset intervals to the sensor region for tpTrain and Infer phases # (if config['sensorAutoReset'] is enabled) # ---------------------------------------------------------------------------- if 'sensorAutoReset' in config and config['sensorAutoReset'] is not None: dd = defaultdict(lambda: 0, config['sensorAutoReset']) # class timedelta([days[, seconds[, microseconds[, milliseconds[, minutes[, # hours[, weeks]]]]]]]) if not (0 == dd['days'] == dd['hours'] == dd['minutes'] == dd['seconds'] \ == dd['milliseconds'] == dd['microseconds'] == dd['weeks']): timeDelta = timedelta(days=dd['days'], hours=dd['hours'], minutes=dd['minutes'], seconds=dd['seconds'], milliseconds=dd['milliseconds'], microseconds=dd['microseconds'], weeks=dd['weeks']) tpTrainSteps = description[ 'tpTrain'] if 'tpTrain' in description else [] inferSteps = description['infer'] if 'infer' in description else [] for step in itertools.chain(tpTrainSteps, inferSteps): if 'setup' not in step: step['setup'] = [] step['setup'].append(setAutoResetInterval(timeDelta)) return description