def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params[ 'classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions']) else: raise Exception("Dataset not found") self.computeCounter = 0 self.history = [] self.resets = [] self.randoms = [] self.currentSequence = self.dataset.generateSequence() self.net = None self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions']) else: raise Exception("Dataset not found") # if not os.path.exists(resultsDir): # os.makedirs(resultsDir) # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') if params['verbosity'] > 0: print " initializing HTM model..." self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.currentSequence = self.dataset.generateSequence() self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.currentSequence = self.dataset.generateSequence() self.perturbed = False self.randoms = [] self.verbosity = 1 self.sequenceCounter = 0
def __init__(self, numPredictions, resultsDir): random.seed(43) self.numPredictions = numPredictions if not os.path.exists(resultsDir): os.makedirs(resultsDir) self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.correct = [] self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.iteration = 0 self.perturbed = False self.randoms = [] self.verbosity = 1 self.dataset = HighOrderDataset(numPredictions=self.numPredictions) self.sequences = [] self.currentSequence = [] self.replenish_sequence()
def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params[ 'classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions']) else: raise Exception("Dataset not found") self.computeCounter = 0 self.history = [] self.resets = [] self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = None self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder( params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params['classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset( numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.computeCounter = 0 self.history = [] self.resets = [] self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = buildNetwork(params['encoding_num'], params['num_cells'], params['encoding_num'], hiddenclass=LSTMLayer, bias=True, outputbias=params['output_bias'], recurrent=True) self.trainer = BackpropTrainer(self.net, dataset=SequentialDataSet( params['encoding_num'], params['encoding_num']), learningrate=0.01, momentum=0, verbose=params['verbosity'] > 0) self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params[ 'classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.computeCounter = 0 self.history = [] self.resets = [] self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = buildNetwork(params['encoding_num'], params['num_cells'], params['encoding_num'], hiddenclass=LSTMLayer, bias=True, outputbias=params['output_bias'], recurrent=True) self.trainer = BackpropTrainer(self.net, dataset=SequentialDataSet(params['encoding_num'], params['encoding_num']), learningrate=0.01, momentum=0, verbose=params['verbosity'] > 0) self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], maxValue=params['encoding_max'], minValue=params['encoding_min']) elif params['encoding'] == 'sparse-distributed': self.encoder = SparseDistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], params['encoding_active_bits']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.numLags = params['num_lags'] self.computeCounter = 0 self.history = [] self.resets = [] self.finishInitializeX = False self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = buildNetwork(params['encoding_num'] * params['num_lags'], params['num_cells'], params['encoding_num'], bias=True, outputbias=True) # self.trainer = BackpropTrainer(self.net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder( params['encoding_num'], params['encoding_num_non_random'], maxValue=params['encoding_max'], minValue=params['encoding_min']) elif params['encoding'] == 'sparse-distributed': self.encoder = SparseDistributedEncoder( params['encoding_num'], params['encoding_num_non_random'], params['encoding_active_bits']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset( numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.numLags = params['num_lags'] self.history = [] self.resets = [] self.finishInitializeX = False self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = initializeELMnet(params['encoding_num'] * params['num_lags'], params['encoding_num'], numNeurons=params['num_cells']) self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], maxValue=params['encoding_max'], minValue=params['encoding_min']) elif params['encoding'] == 'sparse-distributed': self.encoder = SparseDistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], params['encoding_active_bits']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.numLags = params['num_lags'] self.history = [] self.resets = [] self.finishInitializeX = False self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = initializeELMnet(params['encoding_num'] * params['num_lags'], params['encoding_num'], numNeurons=params['num_cells']) self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) print "Sequence dataset: " print " Symbol Number {}".format(self.dataset.numSymbols) for seq in self.dataset.sequences: print seq elif params['dataset'] == 'high-order-long': self.dataset = LongHighOrderDataset(params['sequence_length'], seed=params['seed']) print "Sequence dataset: " print " Symbol Number {}".format(self.dataset.numSymbols) for seq in self.dataset.sequences: print seq else: raise Exception("Dataset not found") self.randomStart = self.dataset.numSymbols + 1 self.randomEnd = self.randomStart + 5000 MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\ ['categoryList'] = range(self.randomEnd) # if not os.path.exists(resultsDir): # os.makedirs(resultsDir) # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') if params['verbosity'] > 0: print " initializing HTM model..." self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) # self.classifier = SDRClassifier(steps=[1], alpha=0.001) self.mapping = getEncoderMapping(self.model, self.dataset.numSymbols) self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.currentSequence = [] self.targetPrediction = [] self.replenish_sequence(params, iteration=0) self.resets = [] self.randoms = [] self.verbosity = 1 self.sequenceCounter = 0
def reset(self, params, repetition): random.seed(params['seed']) if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset( numPredictions=params['num_predictions'], seed=params['seed'], smallAlphabet=params['use_small_alphabet']) print "Sequence dataset: " print " Symbol Number {}".format(self.dataset.numSymbols) for seq in self.dataset.sequences: print seq elif params['dataset'] == 'high-order-long': self.dataset = LongHighOrderDataset(params['sequence_length'], seed=params['seed']) print "Sequence dataset: " print " Symbol Number {}".format(self.dataset.numSymbols) for seq in self.dataset.sequences: print seq else: raise Exception("Dataset not found") self.randomStart = self.dataset.numSymbols + 1 self.randomEnd = self.randomStart + 5000 MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\ ['categoryList'] = range(self.randomEnd) # if not os.path.exists(resultsDir): # os.makedirs(resultsDir) # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') if params['verbosity'] > 0: print " initializing HTM model..." # print MODEL_PARAMS self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) # self.classifier = SDRClassifier(steps=[1], alpha=0.001) print "finish initializing HTM model " if params['kill_cell_percent'] > 0: # a hack to use faulty temporal memory instead self.model._getTPRegion().getSelf()._tfdr = MonitoredFaultyTPShim( numberOfCols=2048, cellsPerColumn=32, newSynapseCount=32, maxSynapsesPerSegment=128, maxSegmentsPerCell=128, initialPerm=0.21, connectedPerm=0.50, permanenceInc=0.10, permanenceDec=0.10, predictedSegmentDecrement=0.01, minThreshold=15, activationThreshold=15, seed=1960, ) self.mapping = getEncoderMapping(self.model, self.dataset.numSymbols) self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.currentSequence = [] self.targetPrediction = [] self.replenish_sequence(params, iteration=0) self.resets = [] self.randoms = [] self.verbosity = 1 self.sequenceCounter = 0
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params[ 'classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions']) else: raise Exception("Dataset not found") self.computeCounter = 0 self.history = [] self.resets = [] self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = None self.sequenceCounter = 0 def window(self, data, params): start = max(0, len(data) - params['learning_window']) return data[start:] def train(self, params): """ Train LSTM network on buffered dataset history After training, run LSTM on history[:-1] to get the state correct :param params: :return: """ n = params['encoding_num'] net = buildNetwork(n, params['num_cells'], n, hiddenclass=LSTMLayer, bias=True, outputbias=params['output_bias'], recurrent=True) net.reset() # prepare training dataset ds = SequentialDataSet(n, n) trainer = RPropMinusTrainer(net, dataset=ds, verbose=params['verbosity'] > 0) history = self.window(self.history, params) resets = self.window(self.resets, params) for i in xrange(1, len(history)): if not resets[i - 1]: ds.addSample(self.encoder.encode(history[i - 1]), self.encoder.encode(history[i])) if resets[i]: ds.newSequence() if len(history) > 1: trainer.trainEpochs(params['num_epochs']) net.reset() # run network on buffered dataset after training to get the state right for i in xrange(len(history) - 1): symbol = history[i] output = net.activate(self.encoder.encode(symbol)) predictions = self.encoder.classify(output, num=params['num_predictions']) if resets[i]: net.reset() return net def killCells(self, killCellPercent): """ kill a fraction of LSTM cells from the network :param killCellPercent: :return: """ if killCellPercent <= 0: return inputLayer = self.net['in'] lstmLayer = self.net['hidden0'] numLSTMCell = lstmLayer.outdim numDead = round(killCellPercent * numLSTMCell) zombiePermutation = numpy.random.permutation(numLSTMCell) deadCells = zombiePermutation[0:numDead] # remove connections from input layer to dead LSTM cells connectionInputToHidden = self.net.connections[inputLayer][0] weightInputToHidden = reshape(connectionInputToHidden.params, (connectionInputToHidden.outdim, connectionInputToHidden.indim)) for cell in deadCells: for dim in range(4): weightInputToHidden[dim * numLSTMCell + cell, :] *= 0 newParams = reshape(weightInputToHidden, (connectionInputToHidden.paramdim,)) self.net.connections[inputLayer][0]._setParameters( newParams, connectionInputToHidden.owner) # remove dead connections within LSTM layer connectionHiddenToHidden = self.net.recurrentConns[0] weightHiddenToHidden = reshape(connectionHiddenToHidden.params, (connectionHiddenToHidden.outdim, connectionHiddenToHidden.indim)) for cell in deadCells: weightHiddenToHidden[:, cell] *= 0 newParams = reshape(weightHiddenToHidden, (connectionHiddenToHidden.paramdim,)) self.net.recurrentConns[0]._setParameters( newParams, connectionHiddenToHidden.owner) # remove connections from dead LSTM cell to output layer connectionHiddenToOutput = self.net.connections[lstmLayer][0] weightHiddenToOutput = reshape(connectionHiddenToOutput.params, (connectionHiddenToOutput.outdim, connectionHiddenToOutput.indim)) for cell in deadCells: weightHiddenToOutput[:, cell] *= 0 newParams = reshape(weightHiddenToOutput, (connectionHiddenToOutput.paramdim,)) self.net.connections[lstmLayer][0]._setParameters( newParams, connectionHiddenToOutput.owner) def replenishSequence(self, params, iteration): if iteration > params['perturb_after']: sequence, target = self.dataset.generateSequence(iteration, perturbed=True) else: sequence, target = self.dataset.generateSequence(iteration) if (iteration > params['inject_noise_after'] and iteration < params['stop_inject_noise_after']): injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = self.encoder.randomSymbol() if params['separate_sequences_with'] == 'random': sequence.append(self.encoder.randomSymbol()) target.append(None) if params['verbosity'] > 0: print "Add sequence to buffer" print "sequence: ", sequence print "target: ", target self.currentSequence += sequence self.targetPrediction += target def check_prediction(self, topPredictions, targets): if targets is None: correct = None else: if isinstance(targets, numbers.Number): correct = targets in topPredictions else: correct = True for prediction in topPredictions: correct = correct and (prediction in targets) return correct def iterate(self, params, repetition, iteration): element = self.currentSequence.pop(0) target = self.targetPrediction.pop(0) # update buffered dataset self.history.append(element) # whether there will be a reset signal after the current record resetFlag = (len(self.currentSequence) == 0 and params['separate_sequences_with'] == 'reset') self.resets.append(resetFlag) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1 and params['separate_sequences_with'] == 'random') self.randoms.append(randomFlag) if len(self.currentSequence) == 0: self.replenishSequence(params, iteration) self.sequenceCounter += 1 # kill cells killCell = False if iteration == params['kill_cell_after']: killCell = True self.killCells(params['kill_cell_percent']) # reset compute counter if iteration % params['compute_every'] == 0: self.computeCounter = params['compute_for'] if self.computeCounter == 0 or iteration < params['compute_after']: computeLSTM = False else: computeLSTM = True if computeLSTM: self.computeCounter -= 1 train = (not params['compute_test_mode'] or iteration % params['compute_every'] == 0) if train: if params['verbosity'] > 0: print "Training LSTM at iteration {}".format(iteration) self.net = self.train(params) # run LSTM on the latest data record output = self.net.activate(self.encoder.encode(element)) predictions = self.encoder.classify(output, num=params['num_predictions']) correct = self.check_prediction(predictions, target) if params['verbosity'] > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( iteration, element, predictions, target, correct) if self.resets[-1]: if params['verbosity'] > 0: print "Reset LSTM at iteration {}".format(iteration) self.net.reset() return {"current": element, "reset": self.resets[-1], "random": self.randoms[-1], "train": train, "predictions": predictions, "truth": target, "killCell": killCell, "sequenceCounter": self.sequenceCounter}
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], maxValue=params['encoding_max'], minValue=params['encoding_min']) elif params['encoding'] == 'sparse-distributed': self.encoder = SparseDistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], params['encoding_active_bits']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.numLags = params['num_lags'] self.history = [] self.resets = [] self.finishInitializeX = False self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = initializeELMnet(params['encoding_num'] * params['num_lags'], params['encoding_num'], numNeurons=params['num_cells']) self.sequenceCounter = 0 def window(self, data, windowSize): start = max(0, len(data) - windowSize) return data[start:] def replenishSequence(self, params, iteration): if iteration > params['perturb_after']: sequence, target = self.dataset.generateSequence(params['seed']+iteration, perturbed=True) else: sequence, target = self.dataset.generateSequence(params['seed']+iteration) if (iteration > params['inject_noise_after'] and iteration < params['stop_inject_noise_after']): injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = self.encoder.randomSymbol() if params['separate_sequences_with'] == 'random': sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration)) target.append(None) if params['verbosity'] > 0: print "Add sequence to buffer" print "sequence: ", sequence print "target: ", target self.currentSequence += sequence self.targetPrediction += target def check_prediction(self, topPredictions, targets): if targets is None: correct = None else: if isinstance(targets, numbers.Number): correct = targets in topPredictions else: correct = True for prediction in topPredictions: correct = correct and (prediction in targets) return correct def killCells(self, killCellPercent): """ kill a fraction of LSTM cells from the network """ if killCellPercent <= 0: return numHiddenNeurons = self.net.numHiddenNeurons numDead = round(killCellPercent * numHiddenNeurons) zombiePermutation = numpy.random.permutation(numHiddenNeurons) deadCells = zombiePermutation[0:numDead] liveCells = zombiePermutation[numDead:] self.net.inputWeights = self.net.inputWeights[liveCells, :] self.net.bias = self.net.bias[:, liveCells] self.net.beta = self.net.beta[liveCells, :] self.net.M = self.net.M[liveCells, liveCells] self.net.numHiddenNeurons = numHiddenNeurons - numDead def iterate(self, params, repetition, iteration): currentElement = self.currentSequence.pop(0) target = self.targetPrediction.pop(0) # update buffered dataset self.history.append(currentElement) # whether there will be a reset signal after the current record resetFlag = (len(self.currentSequence) == 0 and params['separate_sequences_with'] == 'reset') self.resets.append(resetFlag) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1 and params['separate_sequences_with'] == 'random') self.randoms.append(randomFlag) if len(self.currentSequence) == 0: self.replenishSequence(params, iteration) self.sequenceCounter += 1 # # kill cells killCell = False if iteration == params['kill_cell_after']: killCell = True self.killCells(params['kill_cell_percent']) if iteration > params['train_after']: n = params['encoding_num'] if self.finishInitializeX is False: # run initialization phase of OS-ELM NT = params['train_after'] features = numpy.zeros(shape=(NT, n*params['num_lags'])) targets = numpy.zeros(shape=(NT, n)) history = self.window(self.history, NT) for i in range(params['num_lags'], NT): targets[i, :] = self.encoder.encode(history[i]) for lags in xrange(params['num_lags']): shiftTargets = numpy.roll(targets, lags, axis=0) shiftTargets[:lags, :] = 0 features[:, lags*n:(lags+1)*n] = shiftTargets self.net.initializePhase(features[:, :], targets[:, :]) if iteration > params['train_after']: self.finishInitializeX = True else: # run sequential learning phase targets = numpy.zeros((1, params['encoding_num'])) targets[0, :] = self.encoder.encode(self.history[-1]) features = numpy.zeros((1, params['encoding_num'] * params['num_lags'])) for lags in xrange(params['num_lags']): features[0, lags*n:(lags+1)*n] = self.encoder.encode( self.history[-1-(lags+1)]) if iteration < params['stop_training_after']: self.net.train(features, targets) # run ELM on the latest data record n = params['encoding_num'] currentFeatures = numpy.zeros((1, params['encoding_num'] * params['num_lags'])) for lags in xrange(min(params['num_lags'], iteration)): currentFeatures[0, lags*n:(lags+1)*n] = self.encoder.encode(self.history[-1-lags]) output = self.net.predict(currentFeatures) # print self.net.beta.shape # print output.shape # print params['num_predictions'] predictions = self.encoder.classify(output[0], num=params['num_predictions']) correct = self.check_prediction(predictions, target) if params['verbosity'] > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( iteration, currentElement, predictions, target, correct) return {"current": currentElement, "reset": self.resets[-1], "random": self.randoms[-1], "predictions": predictions, "truth": target, "killCell": killCell, "sequenceCounter": self.sequenceCounter}
class Runner(object): def __init__(self, numPredictions, resultsDir): random.seed(43) self.numPredictions = numPredictions if not os.path.exists(resultsDir): os.makedirs(resultsDir) self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.correct = [] self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.iteration = 0 self.perturbed = False self.randoms = [] self.verbosity = 1 self.dataset = HighOrderDataset(numPredictions=self.numPredictions) self.sequences = [] self.currentSequence = [] self.replenish_sequence() def replenish_sequence(self): if self.iteration > PERTURB_AFTER and not self.perturbed: print "PERTURBING" # self.sequences = generateSequences(self.numPredictions, perturbed=True) sequence, target = self.dataset.generateSequence(self.iteration, perturbed=True) self.perturbed = True else: sequence, target = self.dataset.generateSequence(self.iteration) # self.sequences = generateSequences(self.numPredictions, perturbed=False) # sequence = random.choice(self.sequences) if self.iteration > TEMPORAL_NOISE_AFTER: injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = random.randrange(RANDOM_START, RANDOM_END) # append noise element at end of sequence random.seed(self.iteration) print "seed {} start {} end {}".format(self.iteration, RANDOM_START, RANDOM_END) sequence.append(random.randrange(RANDOM_START, RANDOM_END)) print "next sequence: ", sequence self.currentSequence += sequence def step(self): element = self.currentSequence.pop(0) randomFlag = (len(self.currentSequence) == 1) self.randoms.append(randomFlag) result = self.shifter.shift(self.model.run({"element": element})) tm = self.model._getTPRegion().getSelf()._tfdr tm.mmClearHistory() # Use custom classifier (uses predicted cells to make predictions) predictiveColumns = set( [tm.columnForCell(cell) for cell in tm.predictiveCells]) topPredictions = classify(self.mapping, predictiveColumns, self.numPredictions) truth = None if (self.randoms[-1] or len(self.randoms) >= 2 and self.randoms[-2]) else self.currentSequence[0] correct = None if truth is None else (truth in topPredictions) data = { "iteration": self.iteration, "current": element, "reset": False, "random": randomFlag, "train": True, "predictions": topPredictions, "truth": truth } self.resultsFile.write(json.dumps(data) + '\n') self.resultsFile.flush() if self.verbosity > 0: print("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format(self.iteration, element, topPredictions, truth, correct) # replenish sequence if len(self.currentSequence) == 0: self.replenish_sequence() self.iteration += 1
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder( params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params['classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset( numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.computeCounter = 0 self.history = [] self.resets = [] self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = buildNetwork(params['encoding_num'], params['num_cells'], params['encoding_num'], hiddenclass=LSTMLayer, bias=True, outputbias=params['output_bias'], recurrent=True) self.trainer = BackpropTrainer(self.net, dataset=SequentialDataSet( params['encoding_num'], params['encoding_num']), learningrate=0.01, momentum=0, verbose=params['verbosity'] > 0) self.sequenceCounter = 0 def window(self, data, params): start = max(0, len(data) - params['learning_window']) return data[start:] def train(self, params): """ Train LSTM network on buffered dataset history After training, run LSTM on history[:-1] to get the state correct :param params: :return: """ if params['reset_every_training']: n = params['encoding_num'] self.net = buildNetwork(n, params['num_cells'], n, hiddenclass=LSTMLayer, bias=True, outputbias=params['output_bias'], recurrent=True) self.net.reset() # prepare training dataset ds = SequentialDataSet(params['encoding_num'], params['encoding_num']) history = self.window(self.history, params) resets = self.window(self.resets, params) for i in xrange(1, len(history)): if not resets[i - 1]: ds.addSample(self.encoder.encode(history[i - 1]), self.encoder.encode(history[i])) if resets[i]: ds.newSequence() print "Train LSTM network on buffered dataset of length ", len(history) if params['num_epochs'] > 1: trainer = RPropMinusTrainer(self.net, dataset=ds, verbose=params['verbosity'] > 0) if len(history) > 1: trainer.trainEpochs(params['num_epochs']) # run network on buffered dataset after training to get the state right self.net.reset() for i in xrange(len(history) - 1): symbol = history[i] output = self.net.activate(self.encoder.encode(symbol)) self.encoder.classify(output, num=params['num_predictions']) if resets[i]: self.net.reset() else: self.trainer.setData(ds) self.trainer.train() # run network on buffered dataset after training to get the state right self.net.reset() for i in xrange(len(history) - 1): symbol = history[i] output = self.net.activate(self.encoder.encode(symbol)) self.encoder.classify(output, num=params['num_predictions']) if resets[i]: self.net.reset() def killCells(self, killCellPercent): """ kill a fraction of LSTM cells from the network :param killCellPercent: :return: """ if killCellPercent <= 0: return inputLayer = self.net['in'] lstmLayer = self.net['hidden0'] numLSTMCell = lstmLayer.outdim numDead = round(killCellPercent * numLSTMCell) zombiePermutation = numpy.random.permutation(numLSTMCell) deadCells = zombiePermutation[0:numDead] # remove connections from input layer to dead LSTM cells connectionInputToHidden = self.net.connections[inputLayer][0] weightInputToHidden = reshape( connectionInputToHidden.params, (connectionInputToHidden.outdim, connectionInputToHidden.indim)) for cell in deadCells: for dim in range(4): weightInputToHidden[dim * numLSTMCell + cell, :] *= 0 newParams = reshape(weightInputToHidden, (connectionInputToHidden.paramdim, )) self.net.connections[inputLayer][0]._setParameters( newParams, connectionInputToHidden.owner) # remove dead connections within LSTM layer connectionHiddenToHidden = self.net.recurrentConns[0] weightHiddenToHidden = reshape( connectionHiddenToHidden.params, (connectionHiddenToHidden.outdim, connectionHiddenToHidden.indim)) for cell in deadCells: weightHiddenToHidden[:, cell] *= 0 newParams = reshape(weightHiddenToHidden, (connectionHiddenToHidden.paramdim, )) self.net.recurrentConns[0]._setParameters( newParams, connectionHiddenToHidden.owner) # remove connections from dead LSTM cell to output layer connectionHiddenToOutput = self.net.connections[lstmLayer][0] weightHiddenToOutput = reshape( connectionHiddenToOutput.params, (connectionHiddenToOutput.outdim, connectionHiddenToOutput.indim)) for cell in deadCells: weightHiddenToOutput[:, cell] *= 0 newParams = reshape(weightHiddenToOutput, (connectionHiddenToOutput.paramdim, )) self.net.connections[lstmLayer][0]._setParameters( newParams, connectionHiddenToOutput.owner) def replenishSequence(self, params, iteration): if iteration > params['perturb_after']: sequence, target = self.dataset.generateSequence(params['seed'] + iteration, perturbed=True) else: sequence, target = self.dataset.generateSequence(params['seed'] + iteration) if (iteration > params['inject_noise_after'] and iteration < params['stop_inject_noise_after']): injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = self.encoder.randomSymbol() if params['separate_sequences_with'] == 'random': sequence.append( self.encoder.randomSymbol(seed=params['seed'] + iteration)) target.append(None) if params['verbosity'] > 0: print "Add sequence to buffer" print "sequence: ", sequence print "target: ", target self.currentSequence += sequence self.targetPrediction += target def check_prediction(self, topPredictions, targets): if targets is None: correct = None else: if isinstance(targets, numbers.Number): correct = targets in topPredictions else: correct = True for prediction in topPredictions: correct = correct and (prediction in targets) return correct def iterate(self, params, repetition, iteration): currentElement = self.currentSequence.pop(0) target = self.targetPrediction.pop(0) # update buffered dataset self.history.append(currentElement) # whether there will be a reset signal after the current record resetFlag = (len(self.currentSequence) == 0 and params['separate_sequences_with'] == 'reset') self.resets.append(resetFlag) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1 and params['separate_sequences_with'] == 'random') self.randoms.append(randomFlag) if len(self.currentSequence) == 0: self.replenishSequence(params, iteration) self.sequenceCounter += 1 # kill cells killCell = False if iteration == params['kill_cell_after']: killCell = True self.killCells(params['kill_cell_percent']) # reset compute counter if iteration > 0 and iteration % params['compute_every'] == 0: self.computeCounter = params['compute_for'] if self.computeCounter == 0 or iteration < params['compute_after']: computeLSTM = False else: computeLSTM = True if computeLSTM: self.computeCounter -= 1 train = (not params['compute_test_mode'] or iteration % params['compute_every'] == 0) if train: if params['verbosity'] > 0: print "Training LSTM at iteration {}".format(iteration) self.train(params) # run LSTM on the latest data record output = self.net.activate(self.encoder.encode(currentElement)) if params['encoding'] == 'distributed': predictions = self.encoder.classify( output, num=params['num_predictions']) elif params['encoding'] == 'basic': predictions = self.encoder.classify( output, num=params['num_predictions']) correct = self.check_prediction(predictions, target) if params['verbosity'] > 0: print("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format(iteration, currentElement, predictions, target, correct) if self.resets[-1]: if params['verbosity'] > 0: print "Reset LSTM at iteration {}".format(iteration) self.net.reset() return { "iteration": iteration, "current": currentElement, "reset": self.resets[-1], "random": self.randoms[-1], "train": train, "predictions": predictions, "truth": target, "killCell": killCell, "sequenceCounter": self.sequenceCounter }
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions']) else: raise Exception("Dataset not found") self.randomStart = self.dataset.numSymbols + 1 self.randomEnd = self.randomStart + 5000 MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\ ['categoryList'] = range(self.randomEnd) # if not os.path.exists(resultsDir): # os.makedirs(resultsDir) # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') if params['verbosity'] > 0: print " initializing HTM model..." self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.currentSequence = [] self.targetPrediction = [] self.replenish_sequence(params, iteration=0) self.randoms = [] self.verbosity = 1 self.sequenceCounter = 0 def replenish_sequence(self, params, iteration): if iteration > params['perturb_after']: print "PERTURBING" sequence, target = self.dataset.generateSequence(iteration, perturbed=True) else: sequence, target = self.dataset.generateSequence(iteration) if (iteration > params['inject_noise_after'] and iteration < params['stop_inject_noise_after']): injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = random.randrange(self.randomStart, self.randomEnd) if params['verbosity'] > 0: print "injectNoise ", sequence[injectNoiseAt], " at: ", injectNoiseAt # separate sequences with random elements random.seed(iteration) print "seed {} start {} end {}".format(iteration, self.randomStart, self.randomEnd) sequence.append(random.randrange(self.randomStart, self.randomEnd)) target.append(None) if params['verbosity'] > 0: print "Add sequence to buffer" print "sequence: ", sequence print "target: ", target self.currentSequence += sequence self.targetPrediction += target def check_prediction(self, topPredictions, targets): if targets is None: correct = None else: if isinstance(targets, numbers.Number): # single target, multiple predictions correct = targets in topPredictions else: # multiple targets, multiple predictions correct = True for prediction in topPredictions: correct = correct and (prediction in targets) return correct def iterate(self, params, repetition, iteration): element = self.currentSequence.pop(0) target = self.targetPrediction.pop(0) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1) self.randoms.append(randomFlag) result = self.shifter.shift(self.model.run({"element": element})) tm = self.model._getTPRegion().getSelf()._tfdr tm.mmClearHistory() # Use custom classifier (uses predicted cells to make predictions) predictiveColumns = set([tm.columnForCell(cell) for cell in tm.predictiveCells]) topPredictions = classify( self.mapping, predictiveColumns, params['num_predictions']) # correct = self.check_prediction(topPredictions, target) truth = None if (self.randoms[-1] or len(self.randoms) >= 2 and self.randoms[-2]) else self.currentSequence[0] correct = None if truth is None else (truth in topPredictions) data = {"iteration": iteration, "current": element, "reset": False, "random": randomFlag, "train": True, "predictions": topPredictions, "truth": target, "sequenceCounter": self.sequenceCounter} if params['verbosity'] > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( iteration, element, topPredictions, target, correct) if len(self.currentSequence) == 0: self.replenish_sequence(params, iteration) self.sequenceCounter += 1 return data
class Runner(object): def __init__(self, numPredictions, resultsDir): random.seed(43) self.numPredictions = numPredictions if not os.path.exists(resultsDir): os.makedirs(resultsDir) self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.correct = [] self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.iteration = 0 self.perturbed = False self.randoms = [] self.verbosity = 1 self.dataset = HighOrderDataset(numPredictions=self.numPredictions) self.sequences = [] self.currentSequence = [] self.replenish_sequence() def replenish_sequence(self): if self.iteration > PERTURB_AFTER and not self.perturbed: print "PERTURBING" # self.sequences = generateSequences(self.numPredictions, perturbed=True) sequence, target = self.dataset.generateSequence(self.iteration, perturbed=True) self.perturbed = True else: sequence, target = self.dataset.generateSequence(self.iteration) # self.sequences = generateSequences(self.numPredictions, perturbed=False) # sequence = random.choice(self.sequences) if self.iteration > TEMPORAL_NOISE_AFTER: injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = random.randrange(RANDOM_START, RANDOM_END) # append noise element at end of sequence random.seed(self.iteration) print "seed {} start {} end {}".format(self.iteration, RANDOM_START, RANDOM_END) sequence.append(random.randrange(RANDOM_START, RANDOM_END)) print "next sequence: ", sequence self.currentSequence += sequence def step(self): element = self.currentSequence.pop(0) randomFlag = (len(self.currentSequence) == 1) self.randoms.append(randomFlag) result = self.shifter.shift(self.model.run({"element": element})) tm = self.model._getTPRegion().getSelf()._tfdr tm.mmClearHistory() # Use custom classifier (uses predicted cells to make predictions) predictiveColumns = set([tm.columnForCell(cell) for cell in tm.predictiveCells]) topPredictions = classify(self.mapping, predictiveColumns, self.numPredictions) truth = None if (self.randoms[-1] or len(self.randoms) >= 2 and self.randoms[-2]) else self.currentSequence[0] correct = None if truth is None else (truth in topPredictions) data = {"iteration": self.iteration, "current": element, "reset": False, "random": randomFlag, "train": True, "predictions": topPredictions, "truth": truth} self.resultsFile.write(json.dumps(data) + '\n') self.resultsFile.flush() if self.verbosity > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( self.iteration, element, topPredictions, truth, correct) # replenish sequence if len(self.currentSequence) == 0: self.replenish_sequence() self.iteration += 1
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions']) else: raise Exception("Dataset not found") # if not os.path.exists(resultsDir): # os.makedirs(resultsDir) # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') if params['verbosity'] > 0: print " initializing HTM model..." self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.currentSequence = self.dataset.generateSequence() self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.currentSequence = self.dataset.generateSequence() self.perturbed = False self.randoms = [] self.verbosity = 1 self.sequenceCounter = 0 def replenish_sequence(self, params, iteration): if iteration > params['perturb_after'] and not self.perturbed: print "PERTURBING" sequence = self.dataset.generateSequence(perturbed=True) self.perturbed = True else: sequence = self.dataset.generateSequence() if iteration > params['inject_noise_after']: injectNoiseAt = random.randint(1, 3) print "injectNoiseAt: ", injectNoiseAt sequence[injectNoiseAt] = random.randrange(RANDOM_START, RANDOM_END) print sequence[injectNoiseAt] sequence.append(random.randrange(RANDOM_START, RANDOM_END)) if params['verbosity'] > 0: print "Add sequence to buffer" print sequence self.currentSequence += sequence def iterate(self, params, repetition, iteration): element = self.currentSequence.pop(0) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1) self.randoms.append(randomFlag) result = self.shifter.shift(self.model.run({"element": element})) tm = self.model._getTPRegion().getSelf()._tfdr tm.mmClearHistory() # Use custom classifier (uses predicted cells to make predictions) predictiveColumns = set([tm.columnForCell(cell) for cell in tm.predictiveCells]) topPredictions = classify(self.mapping, predictiveColumns, params['num_predictions']) truth = None if (self.randoms[-1] or len(self.randoms) >= 2 and self.randoms[-2] ) else self.currentSequence[0] correct = None if truth is None else (truth in topPredictions) data = {"iteration": iteration, "current": element, "reset": False, "random": randomFlag, "train": True, "predictions": topPredictions, "truth": truth, "sequenceCounter": self.sequenceCounter} if params['verbosity'] > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( iteration, element, topPredictions, truth, correct) if len(self.currentSequence) == 0: self.replenish_sequence(params, iteration) self.sequenceCounter += 1 return data
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], maxValue=params['encoding_max'], minValue=params['encoding_min'], classifyWithRandom=params[ 'classify_with_random']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.numLags = params['num_lags'] self.history = [] self.resets = [] self.finishInitializeX = False self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = initializeELMnet(params['encoding_num'] * params['num_lags'], params['encoding_num'], numNeurons=params['num_cells']) self.sequenceCounter = 0 def window(self, data, windowSize): start = max(0, len(data) - windowSize) return data[start:] def replenishSequence(self, params, iteration): if iteration > params['perturb_after']: sequence, target = self.dataset.generateSequence(params['seed']+iteration, perturbed=True) else: sequence, target = self.dataset.generateSequence(params['seed']+iteration) if (iteration > params['inject_noise_after'] and iteration < params['stop_inject_noise_after']): injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = self.encoder.randomSymbol() if params['separate_sequences_with'] == 'random': sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration)) target.append(None) if params['verbosity'] > 0: print "Add sequence to buffer" print "sequence: ", sequence print "target: ", target self.currentSequence += sequence self.targetPrediction += target def check_prediction(self, topPredictions, targets): if targets is None: correct = None else: if isinstance(targets, numbers.Number): correct = targets in topPredictions else: correct = True for prediction in topPredictions: correct = correct and (prediction in targets) return correct def killCells(self, killCellPercent): """ kill a fraction of LSTM cells from the network """ if killCellPercent <= 0: return numHiddenNeurons = self.net.numHiddenNeurons numDead = round(killCellPercent * numHiddenNeurons) zombiePermutation = numpy.random.permutation(numHiddenNeurons) deadCells = zombiePermutation[0:numDead] liveCells = zombiePermutation[numDead:] self.net.inputWeights = self.net.inputWeights[liveCells, :] self.net.bias = self.net.bias[:, liveCells] self.net.beta = self.net.beta[liveCells, :] self.net.M = self.net.M[liveCells, liveCells] self.net.numHiddenNeurons = numHiddenNeurons - numDead def iterate(self, params, repetition, iteration): currentElement = self.currentSequence.pop(0) target = self.targetPrediction.pop(0) # update buffered dataset self.history.append(currentElement) # whether there will be a reset signal after the current record resetFlag = (len(self.currentSequence) == 0 and params['separate_sequences_with'] == 'reset') self.resets.append(resetFlag) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1 and params['separate_sequences_with'] == 'random') self.randoms.append(randomFlag) if len(self.currentSequence) == 0: self.replenishSequence(params, iteration) self.sequenceCounter += 1 # # kill cells killCell = False if iteration == params['kill_cell_after']: killCell = True self.killCells(params['kill_cell_percent']) if iteration > params['train_after']: n = params['encoding_num'] if self.finishInitializeX is False: # run initialization phase of OS-ELM NT = params['train_after'] features = numpy.zeros(shape=(NT, n*params['num_lags'])) targets = numpy.zeros(shape=(NT, n)) history = self.window(self.history, NT) for i in range(params['num_lags'], NT): targets[i, :] = self.encoder.encode(history[i]) for lags in xrange(params['num_lags']): shiftTargets = numpy.roll(targets, lags, axis=0) shiftTargets[:lags, :] = 0 features[:, lags*n:(lags+1)*n] = shiftTargets self.net.initializePhase(features[:, :], targets[:, :]) if iteration > params['train_after']: self.finishInitializeX = True else: # run sequential learning phase targets = numpy.zeros((1, params['encoding_num'])) targets[0, :] = self.encoder.encode(self.history[-1]) features = numpy.zeros((1, params['encoding_num'] * params['num_lags'])) for lags in xrange(params['num_lags']): features[0, lags*n:(lags+1)*n] = self.encoder.encode( self.history[-1-(lags+1)]) if iteration < params['stop_training_after']: self.net.train(features, targets) # run ELM on the latest data record n = params['encoding_num'] currentFeatures = numpy.zeros((1, params['encoding_num'] * params['num_lags'])) for lags in xrange(min(params['num_lags'], iteration)): currentFeatures[0, lags*n:(lags+1)*n] = self.encoder.encode(self.history[-1-lags]) output = self.net.predict(currentFeatures) # print self.net.beta.shape # print output.shape # print params['num_predictions'] predictions = self.encoder.classify(output[0], num=params['num_predictions']) correct = self.check_prediction(predictions, target) if params['verbosity'] > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( iteration, currentElement, predictions, target, correct) return {"current": currentElement, "reset": self.resets[-1], "random": self.randoms[-1], "predictions": predictions, "truth": target, "killCell": killCell, "sequenceCounter": self.sequenceCounter}
class Suite(PyExperimentSuite): def reset(self, params, repetition): random.seed(params['seed']) if params['encoding'] == 'basic': self.encoder = BasicEncoder(params['encoding_num']) elif params['encoding'] == 'distributed': self.encoder = DistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], maxValue=params['encoding_max'], minValue=params['encoding_min']) elif params['encoding'] == 'sparse-distributed': self.encoder = SparseDistributedEncoder(params['encoding_num'], params['encoding_num_non_random'], params['encoding_active_bits']) else: raise Exception("Encoder not found") if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed']) else: raise Exception("Dataset not found") self.numLags = params['num_lags'] self.computeCounter = 0 self.history = [] self.resets = [] self.finishInitializeX = False self.randoms = [] self.currentSequence = [] self.targetPrediction = [] self.replenishSequence(params, iteration=0) self.net = buildNetwork(params['encoding_num'] * params['num_lags'], params['num_cells'], params['encoding_num'], bias=True, outputbias=True) # self.trainer = BackpropTrainer(self.net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) self.sequenceCounter = 0 def window(self, data, windowSize): start = max(0, len(data) - windowSize) return data[start:] def replenishSequence(self, params, iteration): if iteration > params['perturb_after']: sequence, target = self.dataset.generateSequence(params['seed']+iteration, perturbed=True) else: sequence, target = self.dataset.generateSequence(params['seed']+iteration) if (iteration > params['inject_noise_after'] and iteration < params['stop_inject_noise_after']): injectNoiseAt = random.randint(1, 3) sequence[injectNoiseAt] = self.encoder.randomSymbol() if params['separate_sequences_with'] == 'random': sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration)) target.append(None) if params['verbosity'] > 0: print "Add sequence to buffer" print "sequence: ", sequence print "target: ", target self.currentSequence += sequence self.targetPrediction += target def check_prediction(self, topPredictions, targets): if targets is None: correct = None else: if isinstance(targets, numbers.Number): correct = targets in topPredictions else: correct = True for prediction in topPredictions: correct = correct and (prediction in targets) return correct def train(self, params): """ Train TDNN network on buffered dataset history :param params: :return: """ # self.net = buildNetwork(params['encoding_num'] * params['num_lags'], # params['num_cells'], # params['encoding_num'], # bias=True, # outputbias=True) ds = SupervisedDataSet(params['encoding_num'] * params['num_lags'], params['encoding_num']) history = self.window(self.history, params['learning_window']) n = params['encoding_num'] for i in xrange(params['num_lags'], len(history)): targets = numpy.zeros((1, n)) targets[0, :] = self.encoder.encode(history[i]) features = numpy.zeros((1, n * params['num_lags'])) for lags in xrange(params['num_lags']): features[0, lags * n:(lags + 1) * n] = self.encoder.encode( history[i - (lags + 1)]) ds.addSample(features, targets) trainer = BackpropTrainer(self.net, dataset=ds, verbose=params['verbosity'] > 0) if len(history) > 1: trainer.trainEpochs(params['num_epochs']) # self.net.reset() # # for i in xrange(params['num_lags'], len(history)): # targets = numpy.zeros((1, n)) # targets[0, :] = self.encoder.encode(self.history[i]) # # features = numpy.zeros((1, n * params['num_lags'])) # for lags in xrange(params['num_lags']): # features[0, lags * n:(lags + 1) * n] = self.encoder.encode( # self.history[i - (lags + 1)]) # # output = self.net.activate(features[0, :]) # predictions = self.encoder.classify(output, num=params['num_predictions']) # correct = self.check_prediction(predictions, self.history[i]) # print ("iteration: {0} \t" # "current: {1} \t" # "predictions: {2} \t" # "truth: {3} \t" # "correct: {4} \t").format( # i, self.history[i-1], predictions, self.history[i], correct) def killCells(self, killCellPercent): """ kill a fraction of cells from the network """ if killCellPercent <= 0: return numHiddenNeurons = self.net.numHiddenNeurons numDead = round(killCellPercent * numHiddenNeurons) zombiePermutation = numpy.random.permutation(numHiddenNeurons) deadCells = zombiePermutation[0:numDead] liveCells = zombiePermutation[numDead:] self.net.inputWeights = self.net.inputWeights[liveCells, :] self.net.bias = self.net.bias[:, liveCells] self.net.beta = self.net.beta[liveCells, :] self.net.M = self.net.M[liveCells, liveCells] self.net.numHiddenNeurons = numHiddenNeurons - numDead def iterate(self, params, repetition, iteration): currentElement = self.currentSequence.pop(0) target = self.targetPrediction.pop(0) # update buffered dataset self.history.append(currentElement) # whether there will be a reset signal after the current record resetFlag = (len(self.currentSequence) == 0 and params['separate_sequences_with'] == 'reset') self.resets.append(resetFlag) # whether there will be a random symbol after the current record randomFlag = (len(self.currentSequence) == 1 and params['separate_sequences_with'] == 'random') self.randoms.append(randomFlag) if len(self.currentSequence) == 0: self.replenishSequence(params, iteration) self.sequenceCounter += 1 killCell = False if iteration == params['kill_cell_after']: killCell = True self.killCells(params['kill_cell_percent']) # reset compute counter if iteration > 0 and iteration % params['compute_every'] == 0: self.computeCounter = params['compute_for'] if self.computeCounter == 0 or iteration < params['compute_after']: computeNet = False else: computeNet = True if computeNet: self.computeCounter -= 1 train = iteration % params['compute_every'] == 0 if train: if params['verbosity'] > 0: print "Training Network at iteration {}".format(iteration) self.train(params) if iteration > params['num_lags']: # run network on the latest data record n = params['encoding_num'] currentFeatures = numpy.zeros((params['encoding_num'] * params['num_lags'], )) for lags in xrange(min(params['num_lags'], iteration)): currentFeatures[lags*n:(lags+1)*n] = self.encoder.encode(self.history[-1-lags]) output = self.net.activate(currentFeatures) predictions = self.encoder.classify(output, num=params['num_predictions']) correct = self.check_prediction(predictions, target) if params['verbosity'] > 0: print ("iteration: {0} \t" "current: {1} \t" "predictions: {2} \t" "truth: {3} \t" "correct: {4} \t").format( iteration, currentElement, predictions, target, correct) return {"current": currentElement, "reset": self.resets[-1], "random": self.randoms[-1], "predictions": predictions, "truth": target, "killCell": killCell, "sequenceCounter": self.sequenceCounter}