示例#1
0
class EightBitBrain(object):
    
    def __init__(self, dataset, inNodes, outNodes, hiddenNodes, classes):
        self.__dataset = ClassificationDataSet(inNodes, classes-1)
        for element in dataset:
            self.addDatasetSample(self._binaryList(element[0]), element[1])
        self.__dataset._convertToOneOfMany()
        self.__network = buildNetwork(inNodes, hiddenNodes, self.__dataset.outdim, recurrent=True)
        self.__trainer = BackpropTrainer(self.__network, learningrate = 0.01, momentum = 0.99, verbose = True)
        self.__trainer.setData(self.__dataset)

    def _binaryList(self, n):
        return [int(c) for c in "{0:08b}".format(n)]
    
    def addDatasetSample(self, argument, target):
        self.__dataset.addSample(argument, target)

    def train(self, epochs):
        self.__trainer.trainEpochs(epochs)
    
    def activate(self, information):
        result = self.__network.activate(self._binaryList(information))
        highest = (0,0)
        for resultClass in range(len(result)):
            if result[resultClass] > highest[0]:
                highest = (result[resultClass], resultClass)
        return highest[1]
class Suite(PyExperimentSuite):

  def reset(self, params, repetition):
    print params

    self.nDimInput = 3
    self.inputEncoder = PassThroughEncoder()

    if params['output_encoding'] == None:
      self.outputEncoder = PassThroughEncoder()
      self.nDimOutput = 1
    elif params['output_encoding'] == 'likelihood':
      self.outputEncoder = ScalarBucketEncoder()
      self.nDimOutput = self.outputEncoder.encoder.n

    if (params['dataset'] == 'nyc_taxi' or
            params['dataset'] == 'nyc_taxi_perturb_baseline'):
      self.dataset = NYCTaxiDataset(params['dataset'])
    else:
      raise Exception("Dataset not found")

    self.testCounter = 0
    self.resets = []
    self.iteration = 0

    # initialize LSTM network
    random.seed(6)
    if params['output_encoding'] == None:
      self.net = buildNetwork(self.nDimInput, params['num_cells'], self.nDimOutput,
                         hiddenclass=LSTMLayer, bias=True, outputbias=True, recurrent=True)
    elif params['output_encoding'] == 'likelihood':
      self.net = buildNetwork(self.nDimInput, params['num_cells'], self.nDimOutput,
                         hiddenclass=LSTMLayer, bias=True, outclass=SigmoidLayer, recurrent=True)

    self.trainer = BackpropTrainer(self.net,
                          dataset=SequentialDataSet(self.nDimInput, self.nDimOutput),
                          learningrate=0.01,
                          momentum=0,
                          verbose=params['verbosity'] > 0)

    (self.networkInput, self.targetPrediction, self.trueData) = \
      self.dataset.generateSequence(
      prediction_nstep=params['prediction_nstep'],
      output_encoding=params['output_encoding'],
      noise=params['noise'])


  def window(self, data, params):
    start = max(0, self.iteration - params['learning_window'])
    return data[start:self.iteration]


  def train(self, params, verbose=False):

    if params['reset_every_training']:
      if verbose:
        print 'create lstm network'

      random.seed(6)
      if params['output_encoding'] == None:
        self.net = buildNetwork(self.nDimInput, params['num_cells'], self.nDimOutput,
                           hiddenclass=LSTMLayer, bias=True, outputbias=True, recurrent=True)
      elif params['output_encoding'] == 'likelihood':
        self.net = buildNetwork(self.nDimInput, params['num_cells'], self.nDimOutput,
                           hiddenclass=LSTMLayer, bias=True, outclass=SigmoidLayer, recurrent=True)

    self.net.reset()

    ds = SequentialDataSet(self.nDimInput, self.nDimOutput)
    networkInput = self.window(self.networkInput, params)
    targetPrediction = self.window(self.targetPrediction, params)

    # prepare a training data-set using the history
    for i in xrange(len(networkInput)):
      ds.addSample(self.inputEncoder.encode(networkInput[i]),
                   self.outputEncoder.encode(targetPrediction[i]))

    if params['num_epochs'] > 1:
      trainer = RPropMinusTrainer(self.net, dataset=ds, verbose=verbose)

      if verbose:
        print " train LSTM on ", len(ds), " records for ", params['num_epochs'], " epochs "

      if len(networkInput) > 1:
        trainer.trainEpochs(params['num_epochs'])

    else:
      self.trainer.setData(ds)
      self.trainer.train()

    # run through the training dataset to get the lstm network state right
    self.net.reset()
    for i in xrange(len(networkInput)):
      self.net.activate(ds.getSample(i)[0])


  def iterate(self, params, repetition, iteration, verbose=True):
    self.iteration = iteration

    if self.iteration >= len(self.networkInput):
      return None

    train = False
    if iteration > params['compute_after']:
      if iteration == params['train_at_iteration']:
        train = True

      if params['train_every_month']:
        train = (self.dataset.sequence['time'][iteration].is_month_start and
                  self.dataset.sequence['time'][iteration].hour == 0 and
                  self.dataset.sequence['time'][iteration].minute == 0)

      if params['train_every_week']:
        train = (self.dataset.sequence['time'][iteration].dayofweek==0 and
                  self.dataset.sequence['time'][iteration].hour == 0 and
                  self.dataset.sequence['time'][iteration].minute == 0)

      if params['online_training']:
        train = True
    if verbose:
      print
      print "iteration: ", iteration, " time: ", self.dataset.sequence['time'][iteration]

    if train:
      if verbose:
        print " train at", iteration, " time: ", self.dataset.sequence['time'][iteration]
      self.train(params, verbose)

    if train:
      # reset test counter after training
      self.testCounter = params['test_for']

    if self.testCounter == 0:
      return None
    else:
      self.testCounter -= 1

    symbol = self.networkInput[iteration]
    output = self.net.activate(self.inputEncoder.encode(symbol))

    if params['output_encoding'] == None:
      predictions = self.dataset.reconstructSequence(output[0])
    elif params['output_encoding'] == 'likelihood':
      predictions = list(output/sum(output))
    else:
      predictions = None

    if verbose:
      print " test at :", iteration,

    if iteration == params['perturb_after']:
      if verbose:
        print " perturb data and introduce new patterns"

      (newNetworkInput, newTargetPrediction, newTrueData) = \
        self.dataset.generateSequence(perturbed=True,
                                      prediction_nstep=params['prediction_nstep'],
                                      output_encoding=params['output_encoding'],
                                      noise=params['noise'])

      self.networkInput[iteration+1:] = newNetworkInput[iteration+1:]
      self.targetPrediction[iteration+1:] = newTargetPrediction[iteration+1:]
      self.trueData[iteration+1:] = newTrueData[iteration+1:]

    return {"current": self.networkInput[iteration],
            "reset": None,
            "train": train,
            "predictions": predictions,
            "truth": self.trueData[iteration]}
示例#3
0
class Suite(PyExperimentSuite):
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'],
                                        classifyWithRandom=params[
                                          'classify_with_random'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.computeCounter = 0

    self.history = []
    self.resets = []
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = buildNetwork(params['encoding_num'], params['num_cells'],
                            params['encoding_num'],
                            hiddenclass=LSTMLayer,
                            bias=True,
                            outputbias=params['output_bias'],
                            recurrent=True)

    self.trainer = BackpropTrainer(self.net,
                          dataset=SequentialDataSet(params['encoding_num'], params['encoding_num']),
                          learningrate=0.01,
                          momentum=0,
                          verbose=params['verbosity'] > 0)


    self.sequenceCounter = 0

  def window(self, data, params):
    start = max(0, len(data) - params['learning_window'])
    return data[start:]


  def train(self, params):
    """
    Train LSTM network on buffered dataset history
    After training, run LSTM on history[:-1] to get the state correct
    :param params:
    :return:
    """
    if params['reset_every_training']:
      n = params['encoding_num']
      self.net = buildNetwork(n, params['num_cells'], n,
                               hiddenclass=LSTMLayer,
                               bias=True,
                               outputbias=params['output_bias'],
                               recurrent=True)
      self.net.reset()

    # prepare training dataset
    ds = SequentialDataSet(params['encoding_num'], params['encoding_num'])
    history = self.window(self.history, params)
    resets = self.window(self.resets, params)

    for i in xrange(1, len(history)):
      if not resets[i - 1]:
        ds.addSample(self.encoder.encode(history[i - 1]),
                     self.encoder.encode(history[i]))
      if resets[i]:
        ds.newSequence()

    print "Train LSTM network on buffered dataset of length ", len(history)
    if params['num_epochs'] > 1:
      trainer = RPropMinusTrainer(self.net,
                                  dataset=ds,
                                  verbose=params['verbosity'] > 0)

      if len(history) > 1:
        trainer.trainEpochs(params['num_epochs'])

      # run network on buffered dataset after training to get the state right
      self.net.reset()
      for i in xrange(len(history) - 1):
        symbol = history[i]
        output = self.net.activate(self.encoder.encode(symbol))
        self.encoder.classify(output, num=params['num_predictions'])

        if resets[i]:
          self.net.reset()
    else:
      self.trainer.setData(ds)
      self.trainer.train()

      # run network on buffered dataset after training to get the state right
      self.net.reset()
      for i in xrange(len(history) - 1):
        symbol = history[i]
        output = self.net.activate(self.encoder.encode(symbol))
        self.encoder.classify(output, num=params['num_predictions'])

        if resets[i]:
          self.net.reset()


  def killCells(self, killCellPercent):
    """
    kill a fraction of LSTM cells from the network
    :param killCellPercent:
    :return:
    """
    if killCellPercent <= 0:
      return

    inputLayer = self.net['in']
    lstmLayer = self.net['hidden0']

    numLSTMCell = lstmLayer.outdim
    numDead = round(killCellPercent * numLSTMCell)
    zombiePermutation = numpy.random.permutation(numLSTMCell)
    deadCells = zombiePermutation[0:numDead]

    # remove connections from input layer to dead LSTM cells
    connectionInputToHidden = self.net.connections[inputLayer][0]
    weightInputToHidden = reshape(connectionInputToHidden.params,
                                  (connectionInputToHidden.outdim,
                                   connectionInputToHidden.indim))

    for cell in deadCells:
      for dim in range(4):
        weightInputToHidden[dim * numLSTMCell + cell, :] *= 0

    newParams = reshape(weightInputToHidden,
                        (connectionInputToHidden.paramdim,))
    self.net.connections[inputLayer][0]._setParameters(
      newParams, connectionInputToHidden.owner)

    # remove dead connections within LSTM layer
    connectionHiddenToHidden = self.net.recurrentConns[0]
    weightHiddenToHidden = reshape(connectionHiddenToHidden.params,
                                   (connectionHiddenToHidden.outdim,
                                    connectionHiddenToHidden.indim))

    for cell in deadCells:
      weightHiddenToHidden[:, cell] *= 0

    newParams = reshape(weightHiddenToHidden,
                        (connectionHiddenToHidden.paramdim,))
    self.net.recurrentConns[0]._setParameters(
      newParams, connectionHiddenToHidden.owner)

    # remove connections from dead LSTM cell to output layer
    connectionHiddenToOutput = self.net.connections[lstmLayer][0]
    weightHiddenToOutput = reshape(connectionHiddenToOutput.params,
                                   (connectionHiddenToOutput.outdim,
                                    connectionHiddenToOutput.indim))
    for cell in deadCells:
      weightHiddenToOutput[:, cell] *= 0

    newParams = reshape(weightHiddenToOutput,
                        (connectionHiddenToOutput.paramdim,))
    self.net.connections[lstmLayer][0]._setParameters(
      newParams, connectionHiddenToOutput.owner)


  def replenishSequence(self, params, iteration):
    if iteration > params['perturb_after']:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration,
                                                       perturbed=True)
    else:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration)

    if (iteration > params['inject_noise_after'] and
            iteration < params['stop_inject_noise_after']):
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = self.encoder.randomSymbol()

    if params['separate_sequences_with'] == 'random':
      sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration))
      target.append(None)

    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print "sequence: ", sequence
      print "target: ", target

    self.currentSequence += sequence
    self.targetPrediction += target


  def check_prediction(self, topPredictions, targets):
    if targets is None:
      correct = None
    else:
      if isinstance(targets, numbers.Number):
        correct = targets in topPredictions
      else:
        correct = True
        for prediction in topPredictions:
           correct = correct and (prediction in targets)
    return correct


  def iterate(self, params, repetition, iteration):
    currentElement = self.currentSequence.pop(0)
    target = self.targetPrediction.pop(0)

    # update buffered dataset
    self.history.append(currentElement)

    # whether there will be a reset signal after the current record
    resetFlag = (len(self.currentSequence) == 0 and
                 params['separate_sequences_with'] == 'reset')
    self.resets.append(resetFlag)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1 and
                  params['separate_sequences_with'] == 'random')

    self.randoms.append(randomFlag)

    if len(self.currentSequence) == 0:
      self.replenishSequence(params, iteration)
      self.sequenceCounter += 1

    # kill cells
    killCell = False
    if iteration == params['kill_cell_after']:
      killCell = True
      self.killCells(params['kill_cell_percent'])

    # reset compute counter
    if iteration > 0 and iteration % params['compute_every'] == 0:
      self.computeCounter = params['compute_for']

    if self.computeCounter == 0 or iteration < params['compute_after']:
      computeLSTM = False
    else:
      computeLSTM = True

    if computeLSTM:
      self.computeCounter -= 1

      train = (not params['compute_test_mode'] or
               iteration % params['compute_every'] == 0)

      if train:
        if params['verbosity'] > 0:
          print "Training LSTM at iteration {}".format(iteration)

        self.train(params)

      # run LSTM on the latest data record

      output = self.net.activate(self.encoder.encode(currentElement))
      if params['encoding'] == 'distributed':
        predictions = self.encoder.classify(output, num=params['num_predictions'])
      elif params['encoding'] == 'basic':
        predictions = self.encoder.classify(output, num=params['num_predictions'])

      correct = self.check_prediction(predictions, target)

      if params['verbosity'] > 0:
        print ("iteration: {0} \t"
               "current: {1} \t"
               "predictions: {2} \t"
               "truth: {3} \t"
               "correct: {4} \t").format(
          iteration, currentElement, predictions, target, correct)

      if self.resets[-1]:
        if params['verbosity'] > 0:
          print "Reset LSTM at iteration {}".format(iteration)
        self.net.reset()

      return {"iteration": iteration,
              "current": currentElement,
              "reset": self.resets[-1],
              "random": self.randoms[-1],
              "train": train,
              "predictions": predictions,
              "truth": target,
              "killCell": killCell,
              "sequenceCounter": self.sequenceCounter}
示例#4
0
class Suite(PyExperimentSuite):
    def reset(self, params, repetition):
        random.seed(params['seed'])

        if params['encoding'] == 'basic':
            self.encoder = BasicEncoder(params['encoding_num'])
        elif params['encoding'] == 'distributed':
            self.encoder = DistributedEncoder(
                params['encoding_num'],
                maxValue=params['encoding_max'],
                minValue=params['encoding_min'],
                classifyWithRandom=params['classify_with_random'])
        else:
            raise Exception("Encoder not found")

        if params['dataset'] == 'simple':
            self.dataset = SimpleDataset()
        elif params['dataset'] == 'reber':
            self.dataset = ReberDataset(maxLength=params['max_length'])
        elif params['dataset'] == 'high-order':
            self.dataset = HighOrderDataset(
                numPredictions=params['num_predictions'], seed=params['seed'])
        else:
            raise Exception("Dataset not found")

        self.computeCounter = 0

        self.history = []
        self.resets = []
        self.randoms = []

        self.currentSequence = []
        self.targetPrediction = []
        self.replenishSequence(params, iteration=0)

        self.net = buildNetwork(params['encoding_num'],
                                params['num_cells'],
                                params['encoding_num'],
                                hiddenclass=LSTMLayer,
                                bias=True,
                                outputbias=params['output_bias'],
                                recurrent=True)

        self.trainer = BackpropTrainer(self.net,
                                       dataset=SequentialDataSet(
                                           params['encoding_num'],
                                           params['encoding_num']),
                                       learningrate=0.01,
                                       momentum=0,
                                       verbose=params['verbosity'] > 0)

        self.sequenceCounter = 0

    def window(self, data, params):
        start = max(0, len(data) - params['learning_window'])
        return data[start:]

    def train(self, params):
        """
    Train LSTM network on buffered dataset history
    After training, run LSTM on history[:-1] to get the state correct
    :param params:
    :return:
    """
        if params['reset_every_training']:
            n = params['encoding_num']
            self.net = buildNetwork(n,
                                    params['num_cells'],
                                    n,
                                    hiddenclass=LSTMLayer,
                                    bias=True,
                                    outputbias=params['output_bias'],
                                    recurrent=True)
            self.net.reset()

        # prepare training dataset
        ds = SequentialDataSet(params['encoding_num'], params['encoding_num'])
        history = self.window(self.history, params)
        resets = self.window(self.resets, params)

        for i in xrange(1, len(history)):
            if not resets[i - 1]:
                ds.addSample(self.encoder.encode(history[i - 1]),
                             self.encoder.encode(history[i]))
            if resets[i]:
                ds.newSequence()

        print "Train LSTM network on buffered dataset of length ", len(history)
        if params['num_epochs'] > 1:
            trainer = RPropMinusTrainer(self.net,
                                        dataset=ds,
                                        verbose=params['verbosity'] > 0)

            if len(history) > 1:
                trainer.trainEpochs(params['num_epochs'])

            # run network on buffered dataset after training to get the state right
            self.net.reset()
            for i in xrange(len(history) - 1):
                symbol = history[i]
                output = self.net.activate(self.encoder.encode(symbol))
                self.encoder.classify(output, num=params['num_predictions'])

                if resets[i]:
                    self.net.reset()
        else:
            self.trainer.setData(ds)
            self.trainer.train()

            # run network on buffered dataset after training to get the state right
            self.net.reset()
            for i in xrange(len(history) - 1):
                symbol = history[i]
                output = self.net.activate(self.encoder.encode(symbol))
                self.encoder.classify(output, num=params['num_predictions'])

                if resets[i]:
                    self.net.reset()

    def killCells(self, killCellPercent):
        """
    kill a fraction of LSTM cells from the network
    :param killCellPercent:
    :return:
    """
        if killCellPercent <= 0:
            return

        inputLayer = self.net['in']
        lstmLayer = self.net['hidden0']

        numLSTMCell = lstmLayer.outdim
        numDead = round(killCellPercent * numLSTMCell)
        zombiePermutation = numpy.random.permutation(numLSTMCell)
        deadCells = zombiePermutation[0:numDead]

        # remove connections from input layer to dead LSTM cells
        connectionInputToHidden = self.net.connections[inputLayer][0]
        weightInputToHidden = reshape(
            connectionInputToHidden.params,
            (connectionInputToHidden.outdim, connectionInputToHidden.indim))

        for cell in deadCells:
            for dim in range(4):
                weightInputToHidden[dim * numLSTMCell + cell, :] *= 0

        newParams = reshape(weightInputToHidden,
                            (connectionInputToHidden.paramdim, ))
        self.net.connections[inputLayer][0]._setParameters(
            newParams, connectionInputToHidden.owner)

        # remove dead connections within LSTM layer
        connectionHiddenToHidden = self.net.recurrentConns[0]
        weightHiddenToHidden = reshape(
            connectionHiddenToHidden.params,
            (connectionHiddenToHidden.outdim, connectionHiddenToHidden.indim))

        for cell in deadCells:
            weightHiddenToHidden[:, cell] *= 0

        newParams = reshape(weightHiddenToHidden,
                            (connectionHiddenToHidden.paramdim, ))
        self.net.recurrentConns[0]._setParameters(
            newParams, connectionHiddenToHidden.owner)

        # remove connections from dead LSTM cell to output layer
        connectionHiddenToOutput = self.net.connections[lstmLayer][0]
        weightHiddenToOutput = reshape(
            connectionHiddenToOutput.params,
            (connectionHiddenToOutput.outdim, connectionHiddenToOutput.indim))
        for cell in deadCells:
            weightHiddenToOutput[:, cell] *= 0

        newParams = reshape(weightHiddenToOutput,
                            (connectionHiddenToOutput.paramdim, ))
        self.net.connections[lstmLayer][0]._setParameters(
            newParams, connectionHiddenToOutput.owner)

    def replenishSequence(self, params, iteration):
        if iteration > params['perturb_after']:
            sequence, target = self.dataset.generateSequence(params['seed'] +
                                                             iteration,
                                                             perturbed=True)
        else:
            sequence, target = self.dataset.generateSequence(params['seed'] +
                                                             iteration)

        if (iteration > params['inject_noise_after']
                and iteration < params['stop_inject_noise_after']):
            injectNoiseAt = random.randint(1, 3)
            sequence[injectNoiseAt] = self.encoder.randomSymbol()

        if params['separate_sequences_with'] == 'random':
            sequence.append(
                self.encoder.randomSymbol(seed=params['seed'] + iteration))
            target.append(None)

        if params['verbosity'] > 0:
            print "Add sequence to buffer"
            print "sequence: ", sequence
            print "target: ", target

        self.currentSequence += sequence
        self.targetPrediction += target

    def check_prediction(self, topPredictions, targets):
        if targets is None:
            correct = None
        else:
            if isinstance(targets, numbers.Number):
                correct = targets in topPredictions
            else:
                correct = True
                for prediction in topPredictions:
                    correct = correct and (prediction in targets)
        return correct

    def iterate(self, params, repetition, iteration):
        currentElement = self.currentSequence.pop(0)
        target = self.targetPrediction.pop(0)

        # update buffered dataset
        self.history.append(currentElement)

        # whether there will be a reset signal after the current record
        resetFlag = (len(self.currentSequence) == 0
                     and params['separate_sequences_with'] == 'reset')
        self.resets.append(resetFlag)

        # whether there will be a random symbol after the current record
        randomFlag = (len(self.currentSequence) == 1
                      and params['separate_sequences_with'] == 'random')

        self.randoms.append(randomFlag)

        if len(self.currentSequence) == 0:
            self.replenishSequence(params, iteration)
            self.sequenceCounter += 1

        # kill cells
        killCell = False
        if iteration == params['kill_cell_after']:
            killCell = True
            self.killCells(params['kill_cell_percent'])

        # reset compute counter
        if iteration > 0 and iteration % params['compute_every'] == 0:
            self.computeCounter = params['compute_for']

        if self.computeCounter == 0 or iteration < params['compute_after']:
            computeLSTM = False
        else:
            computeLSTM = True

        if computeLSTM:
            self.computeCounter -= 1

            train = (not params['compute_test_mode']
                     or iteration % params['compute_every'] == 0)

            if train:
                if params['verbosity'] > 0:
                    print "Training LSTM at iteration {}".format(iteration)

                self.train(params)

            # run LSTM on the latest data record

            output = self.net.activate(self.encoder.encode(currentElement))
            if params['encoding'] == 'distributed':
                predictions = self.encoder.classify(
                    output, num=params['num_predictions'])
            elif params['encoding'] == 'basic':
                predictions = self.encoder.classify(
                    output, num=params['num_predictions'])

            correct = self.check_prediction(predictions, target)

            if params['verbosity'] > 0:
                print("iteration: {0} \t"
                      "current: {1} \t"
                      "predictions: {2} \t"
                      "truth: {3} \t"
                      "correct: {4} \t").format(iteration, currentElement,
                                                predictions, target, correct)

            if self.resets[-1]:
                if params['verbosity'] > 0:
                    print "Reset LSTM at iteration {}".format(iteration)
                self.net.reset()

            return {
                "iteration": iteration,
                "current": currentElement,
                "reset": self.resets[-1],
                "random": self.randoms[-1],
                "train": train,
                "predictions": predictions,
                "truth": target,
                "killCell": killCell,
                "sequenceCounter": self.sequenceCounter
            }
示例#5
0
class Suite(PyExperimentSuite):
    def reset(self, params, repetition):
        print params

        self.nDimInput = 3
        self.inputEncoder = PassThroughEncoder()

        if params['output_encoding'] == None:
            self.outputEncoder = PassThroughEncoder()
            self.nDimOutput = 1
        elif params['output_encoding'] == 'likelihood':
            self.outputEncoder = ScalarBucketEncoder()
            self.nDimOutput = self.outputEncoder.encoder.n

        if (params['dataset'] == 'nyc_taxi'
                or params['dataset'] == 'nyc_taxi_perturb_baseline'):
            self.dataset = NYCTaxiDataset(params['dataset'])
        else:
            raise Exception("Dataset not found")

        self.testCounter = 0
        self.resets = []
        self.iteration = 0

        # initialize LSTM network
        random.seed(6)
        if params['output_encoding'] == None:
            self.net = buildNetwork(self.nDimInput,
                                    params['num_cells'],
                                    self.nDimOutput,
                                    hiddenclass=LSTMLayer,
                                    bias=True,
                                    outputbias=True,
                                    recurrent=True)
        elif params['output_encoding'] == 'likelihood':
            self.net = buildNetwork(self.nDimInput,
                                    params['num_cells'],
                                    self.nDimOutput,
                                    hiddenclass=LSTMLayer,
                                    bias=True,
                                    outclass=SigmoidLayer,
                                    recurrent=True)

        self.trainer = BackpropTrainer(self.net,
                                       dataset=SequentialDataSet(
                                           self.nDimInput, self.nDimOutput),
                                       learningrate=0.01,
                                       momentum=0,
                                       verbose=params['verbosity'] > 0)

        (self.networkInput, self.targetPrediction, self.trueData) = \
          self.dataset.generateSequence(
          prediction_nstep=params['prediction_nstep'],
          output_encoding=params['output_encoding'],
          noise=params['noise'])

    def window(self, data, params):
        start = max(0, self.iteration - params['learning_window'])
        return data[start:self.iteration]

    def train(self, params, verbose=False):

        if params['reset_every_training']:
            if verbose:
                print 'create lstm network'

            random.seed(6)
            if params['output_encoding'] == None:
                self.net = buildNetwork(self.nDimInput,
                                        params['num_cells'],
                                        self.nDimOutput,
                                        hiddenclass=LSTMLayer,
                                        bias=True,
                                        outputbias=True,
                                        recurrent=True)
            elif params['output_encoding'] == 'likelihood':
                self.net = buildNetwork(self.nDimInput,
                                        params['num_cells'],
                                        self.nDimOutput,
                                        hiddenclass=LSTMLayer,
                                        bias=True,
                                        outclass=SigmoidLayer,
                                        recurrent=True)

        self.net.reset()

        ds = SequentialDataSet(self.nDimInput, self.nDimOutput)
        networkInput = self.window(self.networkInput, params)
        targetPrediction = self.window(self.targetPrediction, params)

        # prepare a training data-set using the history
        for i in xrange(len(networkInput)):
            ds.addSample(self.inputEncoder.encode(networkInput[i]),
                         self.outputEncoder.encode(targetPrediction[i]))

        if params['num_epochs'] > 1:
            trainer = RPropMinusTrainer(self.net, dataset=ds, verbose=verbose)

            if verbose:
                print " train LSTM on ", len(
                    ds), " records for ", params['num_epochs'], " epochs "

            if len(networkInput) > 1:
                trainer.trainEpochs(params['num_epochs'])

        else:
            self.trainer.setData(ds)
            self.trainer.train()

        # run through the training dataset to get the lstm network state right
        self.net.reset()
        for i in xrange(len(networkInput)):
            self.net.activate(ds.getSample(i)[0])

    def iterate(self, params, repetition, iteration, verbose=True):
        self.iteration = iteration

        if self.iteration >= len(self.networkInput):
            return None

        train = False
        if iteration > params['compute_after']:
            if iteration == params['train_at_iteration']:
                train = True

            if params['train_every_month']:
                train = (
                    self.dataset.sequence['time'][iteration].is_month_start
                    and self.dataset.sequence['time'][iteration].hour == 0
                    and self.dataset.sequence['time'][iteration].minute == 0)

            if params['train_every_week']:
                train = (
                    self.dataset.sequence['time'][iteration].dayofweek == 0
                    and self.dataset.sequence['time'][iteration].hour == 0
                    and self.dataset.sequence['time'][iteration].minute == 0)

            if params['online_training']:
                train = True
        if verbose:
            print
            print "iteration: ", iteration, " time: ", self.dataset.sequence[
                'time'][iteration]

        if train:
            if verbose:
                print " train at", iteration, " time: ", self.dataset.sequence[
                    'time'][iteration]
            self.train(params, verbose)

        if train:
            # reset test counter after training
            self.testCounter = params['test_for']

        if self.testCounter == 0:
            return None
        else:
            self.testCounter -= 1

        symbol = self.networkInput[iteration]
        output = self.net.activate(self.inputEncoder.encode(symbol))

        if params['output_encoding'] == None:
            predictions = self.dataset.reconstructSequence(output[0])
        elif params['output_encoding'] == 'likelihood':
            predictions = list(output / sum(output))
        else:
            predictions = None

        if verbose:
            print " test at :", iteration,

        if iteration == params['perturb_after']:
            if verbose:
                print " perturb data and introduce new patterns"

            (newNetworkInput, newTargetPrediction, newTrueData) = \
              self.dataset.generateSequence(perturbed=True,
                                            prediction_nstep=params['prediction_nstep'],
                                            output_encoding=params['output_encoding'],
                                            noise=params['noise'])

            self.networkInput[iteration + 1:] = newNetworkInput[iteration + 1:]
            self.targetPrediction[iteration +
                                  1:] = newTargetPrediction[iteration + 1:]
            self.trueData[iteration + 1:] = newTrueData[iteration + 1:]

        return {
            "current": self.networkInput[iteration],
            "reset": None,
            "train": train,
            "predictions": predictions,
            "truth": self.trueData[iteration]
        }