def run_HTM_false_positive_experiment_synapses(num_neurons=1,
                                               a=512,
                                               dim=16000,
                                               num_samples=1000,
                                               num_dendrites=500,
                                               test_dendrite_lengths=range(
                                                   2, 32, 2),
                                               num_trials=1000):
    """
  Run an experiment to test the false positive rate based on number of
  synapses per dendrite, dimension and sparsity.  Uses a single neuron,
  with a threshold nonlinearity of theta = s/2.

  Based on figure 5B in the original SDR paper.

  The parameters used in generating the figure for this experiment are:
  1.  a = 512, dim = 16000
  2.  a = 4000, dim = 16000
  3.  a = 32, dim = 2000
  4.  a = 32, dim = 4000
  In each case, we used 1000 samples per trial, 1000 trials, 500 dendrite
  segments, and tested dendrite lengths in (2, 4, ..., 30), with the threshold
  for each segment being half its length.
  """
    for dendrite_length in test_dendrite_lengths:
        nonlinearity = threshold_nonlinearity(dendrite_length / 2)

        fps = []
        fns = []

        for trial in range(num_trials):

            neuron = Neuron(size=dendrite_length * num_dendrites,
                            num_dendrites=num_dendrites,
                            dendrite_length=dendrite_length,
                            dim=dim,
                            nonlinearity=nonlinearity)
            pos, neg = generate_evenly_distributed_data_sparse(
                dim=dim, num_active=a, num_samples=num_samples /
                2), generate_evenly_distributed_data_sparse(
                    dim=dim, num_active=a, num_samples=num_samples / 2)
            #labels = numpy.asarray([1 for i in range(num_samples/2)] + [-1 for i in range(num_samples/2)])

            neuron.HTM_style_initialize_on_data(
                pos, numpy.asarray([1 for i in range(num_samples / 2)]))

            error, fp, fn = get_error(neg,
                                      [-1 for i in range(num_samples / 2)],
                                      [neuron])

            fps.append(fp)
            fns.append(fn)
            print "Error at {} synapses per dendrite is {}, with {} false positives and {} false negatives".format(
                dendrite_length, fp / (num_samples / 2.), fp, fn)

        with open("num_dendrites_FP_{}_{}.txt".format(a, dim), "a") as f:
            f.write(
                str(dendrite_length) + ", " + str(sum(fps)) + ", " +
                str(num_trials * num_samples / 2.) + "\n")
示例#2
0
def run_false_positive_experiment_dim(num_neurons=1,
                                      num_neg_neurons=1,
                                      a=128,
                                      test_dims=range(1100, 2100, 200),
                                      num_samples=1000,
                                      num_dendrites=500,
                                      dendrite_length=24,
                                      num_trials=10000,
                                      nonlinearity=sigmoid_nonlinearity(
                                          11.5, 5)):
    """
  Run an experiment to test the false positive rate based on number of
  synapses per dendrite, dimension and sparsity.  Uses two competing neurons,
  along the P&M model.

  Based on figure 5B in the original SDR paper.
  """
    for dim in test_dims:

        fps = []
        fns = []

        for trial in range(num_trials):

            neuron = Neuron(size=dendrite_length * num_dendrites,
                            num_dendrites=num_dendrites,
                            dendrite_length=dendrite_length,
                            dim=dim,
                            nonlinearity=nonlinearity)
            neg_neuron = Neuron(size=dendrite_length * num_dendrites,
                                num_dendrites=num_dendrites,
                                dendrite_length=dendrite_length,
                                dim=dim,
                                nonlinearity=nonlinearity)
            data = generate_evenly_distributed_data_sparse(
                dim=dim, num_active=a, num_samples=num_samples)
            labels = numpy.asarray([1 for i in range(num_samples / 2)] +
                                   [-1 for i in range(num_samples / 2)])
            flipped_labels = labels * -1

            neuron.HTM_style_initialize_on_data(data, labels)
            neg_neuron.HTM_style_initialize_on_data(data, flipped_labels)

            error, fp, fn, uc = get_error(data,
                                          labels, [neuron], [neg_neuron],
                                          add_noise=True)

            fps.append(fp)
            fns.append(fn)
            print "Error at n = {} is {}, with {} false positives and {} false negatives, with {} unclassified".format(
                dim, error, fp, fn, uc)

        with open("pm_dim_FP_{}.txt".format(a), "a") as f:
            f.write(
                str(dim) + ", " + str(sum(fns + fps)) + ", " +
                str(num_trials * num_samples) + "\n")
def run_HTM_false_positive_experiment_synapses(num_neurons = 1,
                         a = 512,
                         dim = 16000,
                         num_samples = 1000,
                         num_dendrites = 500,
                         test_dendrite_lengths = range(2, 32, 2),
                         num_trials = 1000):
  """
  Run an experiment to test the false positive rate based on number of
  synapses per dendrite, dimension and sparsity.  Uses a single neuron,
  with a threshold nonlinearity of theta = s/2.

  Based on figure 5B in the original SDR paper.

  The parameters used in generating the figure for this experiment are:
  1.  a = 512, dim = 16000
  2.  a = 4000, dim = 16000
  3.  a = 32, dim = 2000
  4.  a = 32, dim = 4000
  In each case, we used 1000 samples per trial, 1000 trials, 500 dendrite
  segments, and tested dendrite lengths in (2, 4, ..., 30), with the threshold
  for each segment being half its length.
  """
  for dendrite_length in test_dendrite_lengths:
    nonlinearity = threshold_nonlinearity(dendrite_length / 2)

    fps = []
    fns = []

    for trial in range(num_trials):

      neuron = Neuron(size = dendrite_length*num_dendrites, num_dendrites = num_dendrites, dendrite_length = dendrite_length, dim = dim, nonlinearity = nonlinearity)
      pos, neg = generate_evenly_distributed_data_sparse(dim = dim, num_active = a, num_samples = num_samples/2), generate_evenly_distributed_data_sparse(dim = dim, num_active = a, num_samples = num_samples/2)
      #labels = numpy.asarray([1 for i in range(num_samples/2)] + [-1 for i in range(num_samples/2)])

      neuron.HTM_style_initialize_on_data(pos, numpy.asarray([1 for i in range(num_samples/2)]))

      error, fp, fn = get_error(neg, [-1 for i in range(num_samples/2)], [neuron])

      fps.append(fp)
      fns.append(fn)
      print "Error at {} synapses per dendrite is {}, with {} false positives and {} false negatives".format(dendrite_length, fp/(num_samples/2.), fp, fn)

    with open("num_dendrites_FP_{}_{}.txt".format(a, dim), "a") as f:
      f.write(str(dendrite_length) + ", " + str(sum(fps)) + ", " + str(num_trials*num_samples/2.) + "\n")
def run_false_positive_experiment_dim(num_neurons = 1,
                    num_neg_neurons = 1,
                    a = 128,
                    test_dims = range(1100, 2100, 200),
                    num_samples = 1000,
                    num_dendrites = 500,
                    dendrite_length = 24,
                    num_trials = 10000,
                    nonlinearity = sigmoid_nonlinearity(11.5, 5)):
  """
  Run an experiment to test the false positive rate based on number of
  synapses per dendrite, dimension and sparsity.  Uses two competing neurons,
  along the P&M model.

  Based on figure 5B in the original SDR paper.
  """
  for dim in test_dims:

    fps = []
    fns = []

    for trial in range(num_trials):

      neuron = Neuron(size = dendrite_length*num_dendrites, num_dendrites = num_dendrites, dendrite_length = dendrite_length, dim = dim, nonlinearity = nonlinearity)
      neg_neuron = Neuron(size = dendrite_length*num_dendrites, num_dendrites = num_dendrites, dendrite_length = dendrite_length, dim = dim, nonlinearity = nonlinearity)
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = a, num_samples = num_samples)
      labels = numpy.asarray([1 for i in range(num_samples/2)] + [-1 for i in range(num_samples/2)])
      flipped_labels = labels * -1

      neuron.HTM_style_initialize_on_data(data, labels)
      neg_neuron.HTM_style_initialize_on_data(data, flipped_labels)

      error, fp, fn, uc = get_error(data, labels, [neuron], [neg_neuron], add_noise = True)

      fps.append(fp)
      fns.append(fn)
      print "Error at n = {} is {}, with {} false positives and {} false negatives, with {} unclassified".format(dim, error, fp, fn, uc)

    with open("pm_dim_FP_{}.txt".format(a), "a") as f:
      f.write(str(dim) + ", " + str(sum(fns + fps)) + ", " + str(num_trials*num_samples) + "\n")
def run_noise_experiment(num_neurons = 1,
             a = 128,
             dim = 6000,
             test_noise_levels = range(15, 100, 5),
             num_samples = 500,
             num_dendrites = 500,
             dendrite_length = 30,
             theta = 8,
             num_trials = 100):
  """
  Tests the impact of noise on a neuron, using an HTM approach to a P&M
  model of a neuron.  Nonlinearity is a simple threshold at theta, as in the
  original version of this experiment, and each dendrite is bound by the
  initialization to a single pattern.  Only one neuron is used, unlike in the
  P&M classification experiment, and a successful identification is simply
  defined as at least one dendrite having theta active synapses.

  Training is done via HTM-style initialization.  In the event that the init
  fails to produce an error rate of 0 without noise (which anecdotally never
  occurs), we simple reinitialize.

  Results are saved to the file noise_FN_{theta}.txt.

  This corresponds to the false negative vs. noise level figure in the paper.
  To generate the results shown, we used theta = 8, theta = 12 and theta = 16,
  with noise levels in range(15, 85, 5), 500 dendrites and 30 synapses per
  dendrite.  We generated 500 sample SDRs, one per dendrite, and ran 100 trials
  at each noise level.  Each SDR had a = 128, dim = 6000.
  """

  nonlinearity = threshold_nonlinearity(theta)
  for noise in test_noise_levels:

    fps = []
    fns = []

    for trial in range(num_trials):

      successful_initialization = False
      while not successful_initialization:
        neuron = Neuron(size = dendrite_length*num_dendrites, num_dendrites = num_dendrites, dendrite_length = dendrite_length, dim = dim, nonlinearity = nonlinearity)
        data = generate_evenly_distributed_data_sparse(dim = dim, num_active = a, num_samples = num_samples)
        labels = [1 for i in range(num_samples)]

        neuron.HTM_style_initialize_on_data(data, labels)

        error, fp, fn = get_error(data, labels, [neuron])
        print "Initialization error is {}, with {} false positives and {} false negatives".format(error, fp, fn)
        if error == 0:
          successful_initialization = True
        else:
          print "Repeating to get a successful initialization"

      apply_noise(data, noise)
      error, fp, fn = get_error(data, labels, [neuron])
      fps.append(fp)
      fns.append(fn)
      print "Error at noise {} is {}, with {} false positives and {} false negatives".format(noise, error, fp, fn)

    with open("noise_FN_{}.txt".format(theta), "a") as f:
      f.write(str(noise) + ", " + str(numpy.sum(fns)) + ", " + str(num_trials*num_samples) + "\n")
def run_tm_noise_experiment(dim = 2048,
                            cellsPerColumn=1,
                            num_active = 40,
                            activationThreshold=16,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=16,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_samples = 1,
                            num_trials = 1000,
                            sequence_length = 20,
                            training_iters = 1,
                            automatic_threshold = False,
                            noise_range = range(0, 100, 5)):

  """
  Run an experiment tracking the performance of the temporal memory given
  noise.  The number of active cells and the dimensions of the TM are
  fixed. We track performance by comparing the cells predicted to be
  active with the cells actually active in the sequence without noise at
  every timestep, and averaging across timesteps. Three metrics are used,
  correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard
  index) and cosine similarity (using scipy.spatial.distance.cosine). The
  Jaccard set similarity is the canonical metric used in the paper, but
  all three metrics tend to produce very similar results.

  Typically, this experiment is run to test the influence of activation
  threshold on noise tolerance, with multiple different thresholds tested.
  However, this experiment could also be used to examine the influence of
  factors such as sparsity and sequence length.

  Output is written to tm_noise_{threshold}}.txt, including sample size.

  We used three different activation threshold settings, 8, 12 and 16, mirroring
  the parameters used in the Poirazi neuron model experiment.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)

  for noise in noise_range:
    print noise
    for trial in range(num_trials):
      tm = TM(columnDimensions=(dim,),
          cellsPerColumn=cellsPerColumn,
          activationThreshold=activationThreshold,
          initialPermanence=initialPermanence,
          connectedPermanence=connectedPermanence,
          minThreshold=minThreshold,
          maxNewSynapseCount=maxNewSynapseCount,
          permanenceIncrement=permanenceIncrement,
          permanenceDecrement=permanenceDecrement,
          predictedSegmentDecrement=predictedSegmentDecrement,
          maxSegmentsPerCell=maxSegmentsPerCell,
          maxSynapsesPerSegment=maxSynapsesPerSegment,
          )#seed=seed)

      datapoints = []
      canonical_active_cells = []

      for sample in range(num_samples):
        data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
        datapoints.append(data)
        for i in range(training_iters):
          for j in range(data.nRows()):
            activeColumns = set(data.rowNonZeros(j)[0])
            tm.compute(activeColumns, learn = True)
          tm.reset()

        current_active_cells = []
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
          current_active_cells.append(tm.getActiveCells())
        canonical_active_cells.append(current_active_cells)
        tm.reset()

      # Now that the TM has been trained, check its performance on each sequence with noise added.
      correlations = []
      similarities = []
      csims = []
      for datapoint, active_cells in zip(datapoints, canonical_active_cells):
        data = copy.deepcopy(datapoint)
        apply_noise(data, noise)

        predicted_cells = []

        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = False)
          predicted_cells.append(tm.getPredictiveCells())

        similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
        dense_predicted_cells = convert_cell_lists_to_dense(2048*32, predicted_cells[:-1])
        dense_active_cells = convert_cell_lists_to_dense(2048*32, active_cells[1:])
        correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]
        csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

        correlation = numpy.nan_to_num(correlation)
        csim = numpy.nan_to_num(csim)
        correlations.append(numpy.mean(correlation))
        similarities.append(numpy.mean(similarity))
        csims.append(numpy.mean(csim))

    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    with open("tm_noise_{}.txt".format(activationThreshold), "a") as f:
      f.write(str(noise)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
示例#7
0
def run_tm_union_experiment(dim = 4000,
                            cellsPerColumn=1,
                            num_active = 40,
                            activationThreshold=5,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=5,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_branches_range = range(50, 51, 1),
                            onset_length = 5,
                            training_iters = 10,
                            num_trials = 10000,
                            automatic_threshold = True,
                            save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  We tested two different dimension settings, 2000 and 4000.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)

  for num_branches in num_branches_range:
    overlaps = []
    surprises = []
    csims = []
    for trial in range(num_trials):
      if (trial + 1) % 100 == 0:
        print trial + 1
      tm = TM(columnDimensions=(dim,),
              cellsPerColumn=cellsPerColumn,
              activationThreshold=activationThreshold,
              initialPermanence=initialPermanence,
              connectedPermanence=connectedPermanence,
              minThreshold=minThreshold,
              maxNewSynapseCount=maxNewSynapseCount,
              permanenceIncrement=permanenceIncrement,
              permanenceDecrement=permanenceDecrement,
              predictedSegmentDecrement=predictedSegmentDecrement,
              maxSegmentsPerCell=maxSegmentsPerCell,
              maxSynapsesPerSegment=maxSynapsesPerSegment,
              seed=seed)

      datapoints = []
      canonical_active_cells = []
      onset = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = onset_length)

      for branch in range(num_branches):
        datapoint = numpy.random.choice(dim, num_active, replace = False)
        datapoints.append(datapoint)
        for i in range(training_iters):
          for j in range(onset.nRows()):
            activeColumns = set(onset.rowNonZeros(j)[0])
            tm.compute(activeColumns, learn = True)
          tm.compute(datapoint, learn=True)
          tm.reset()

      for j in range(onset.nRows()):
        activeColumns = set(onset.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
      predicted_cells = tm.getPredictiveCells()

      datapoint = numpy.random.choice(dim, num_active, replace = False)
      overlap = (1. * len(set(predicted_cells) & set(datapoint)))/len(datapoint)
      surprise = len(datapoint) - len(set(predicted_cells) & set(datapoint))
      dense_predicted_cells = numpy.zeros((dim*cellsPerColumn,))
      for cell in predicted_cells:
        dense_predicted_cells[cell] = 1.
      dense_active_cells = numpy.zeros((dim*cellsPerColumn,))
      for cell in datapoint:
        dense_active_cells[cell] = 1.
      csim = 1 - cosine(dense_predicted_cells, dense_active_cells)
      csim = numpy.nan_to_num(csim)
      overlaps.append(overlap)
      surprises.append(surprise)
      csims.append(csim)

    overlap = numpy.mean(overlaps)
    surprise = numpy.mean(surprises)
    csim = numpy.mean(csims)
    print dim, overlap, surprise, csim
    if save_results:
      with open("tm_union_n{}_a{}_c{}.txt".format(dim, num_active, cellsPerColumn), "a") as f:
        f.write(str(num_branches)+", " + str(overlap) + ", " + str(surprise) + ", " + str(csim) + ", " + str(num_trials) + "\n")
示例#8
0
def run_tm_dim_experiment(test_dims = range(300, 3100, 100),
                          cellsPerColumn=1,
                          num_active = 256,
                          activationThreshold=10,
                          initialPermanence=0.8,
                          connectedPermanence=0.50,
                          minThreshold=10,
                          maxNewSynapseCount=20,
                          permanenceIncrement=0.05,
                          permanenceDecrement=0.00,
                          predictedSegmentDecrement=0.000,
                          maxSegmentsPerCell=4000,
                          maxSynapsesPerSegment=255,
                          seed=42,
                          num_samples = 1000,
                          sequence_length = 20,
                          training_iters = 1,
                          automatic_threshold = False,
                          save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  In our experiments, we used the set similarity metric (third column in output)
  along with three different values for num_active, 64, 128 and 256.  We used
  dimensions from 300 to 2900 in each case, testing every 100.  1000 sequences
  of length 20 were passed to the TM in each trial.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)
    print "Using activation threshold {}".format(activationThreshold)

  for dim in test_dims:
    tm = TM(columnDimensions=(dim,),
            cellsPerColumn=cellsPerColumn,
            activationThreshold=activationThreshold,
            initialPermanence=initialPermanence,
            connectedPermanence=connectedPermanence,
            minThreshold=minThreshold,
            maxNewSynapseCount=maxNewSynapseCount,
            permanenceIncrement=permanenceIncrement,
            permanenceDecrement=permanenceDecrement,
            predictedSegmentDecrement=predictedSegmentDecrement,
            maxSegmentsPerCell=maxSegmentsPerCell,
            maxSynapsesPerSegment=maxSynapsesPerSegment,
            seed=seed)

    tm.setMinThreshold(1000)

    datapoints = []
    canonical_active_cells = []

    for sample in range(num_samples):
      if (sample + 1) % 10 == 0:
        print sample + 1
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
      datapoints.append(data)
      for i in range(training_iters):
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
        tm.reset()

      current_active_cells = []
      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = True)
        current_active_cells.append(tm.getActiveCells())
      canonical_active_cells.append(current_active_cells)
      tm.reset()

    # Now that the TM has been trained, check its performance on each sequence with noise added.
    correlations = []
    similarities = []
    csims = []
    for datapoint, active_cells in zip(datapoints, canonical_active_cells):
      data = copy.deepcopy(datapoint)
      predicted_cells = []

      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
        predicted_cells.append(tm.getPredictiveCells())
      tm.reset()

      similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
      dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1])
      dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:])

      correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      correlation = numpy.nan_to_num(correlation)
      csim = numpy.nan_to_num(csim)
      correlations.append(numpy.mean(correlation))
      similarities.append(numpy.mean(similarity))
      csims.append(numpy.mean(csim))



    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    print dim, correlation, similarity, csim
    if save_results:
        with open("tm_dim_{}.txt".format(num_active), "a") as f:
          f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")
def run_false_positive_experiment_dim(numActive=128,
                                      dim=500,
                                      numSamples=1000,
                                      numDendrites=500,
                                      synapses=24,
                                      numTrials=10000,
                                      seed=42,
                                      nonlinearity=sigmoid_nonlinearity(
                                          11.5, 5)):
    """
  Run an experiment to test the false positive rate based on number of synapses
  per dendrite, dimension and sparsity.  Uses two competing neurons, along the
  P&M model.

  Based on figure 5B in the original SDR paper.
  """
    numpy.random.seed(seed)
    fps = []
    fns = []
    totalUnclassified = 0

    for trial in range(numTrials):

        # data = generate_evenly_distributed_data_sparse(dim = dim,
        #                                                num_active = numActive,
        #                                                num_samples = numSamples)
        # labels = numpy.asarray([1 for i in range(numSamples / 2)] +
        #                        [-1 for i in range(numSamples / 2)])
        # flipped_labels = labels * -1

        negData = generate_evenly_distributed_data_sparse(
            dim=dim, num_active=numActive, num_samples=numSamples / 2)
        posData = generate_evenly_distributed_data_sparse(
            dim=dim, num_active=numActive, num_samples=numSamples / 2)
        halfLabels = numpy.asarray([1 for _ in range(numSamples / 2)])
        flippedHalfLabels = halfLabels * -1

        neuron = Neuron(size=synapses * numDendrites,
                        num_dendrites=numDendrites,
                        dendrite_length=synapses,
                        dim=dim,
                        nonlinearity=nonlinearity)
        neg_neuron = Neuron(size=synapses * numDendrites,
                            num_dendrites=numDendrites,
                            dendrite_length=synapses,
                            dim=dim,
                            nonlinearity=nonlinearity)

        neuron.HTM_style_initialize_on_positive_data(posData)
        neg_neuron.HTM_style_initialize_on_positive_data(negData)

        # Get error for positively labeled data
        fp, fn, uc = get_error(posData, halfLabels, [neuron], [neg_neuron])
        totalUnclassified += uc
        fps.append(fp)
        fns.append(fn)

        # Get error for negatively labeled data
        fp, fn, uc = get_error(negData, flippedHalfLabels, [neuron],
                               [neg_neuron])
        totalUnclassified += uc
        fps.append(fp)
        fns.append(fn)

    print "Error with n = {} : {} FP, {} FN, {} unclassified".format(
        dim, sum(fps), sum(fns), totalUnclassified)

    result = {
        "dim":
        dim,
        "totalFP":
        sum(fps),
        "totalFN":
        sum(fns),
        "total mistakes":
        sum(fns + fps) + totalUnclassified,
        "error":
        float(sum(fns + fps) + totalUnclassified) / (numTrials * numSamples),
        "totalSamples":
        numTrials * numSamples,
        "a":
        numActive,
        "num_dendrites":
        numDendrites,
        "totalUnclassified":
        totalUnclassified,
        "synapses":
        24,
        "seed":
        seed,
    }

    return result
def run_tm_noise_experiment(dim=2048,
                            cellsPerColumn=1,
                            num_active=40,
                            activationThreshold=16,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=16,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_samples=1,
                            num_trials=1000,
                            sequence_length=20,
                            training_iters=1,
                            automatic_threshold=False,
                            noise_range=range(0, 100, 5)):
    """
  Run an experiment tracking the performance of the temporal memory given
  noise.  The number of active cells and the dimensions of the TM are
  fixed. We track performance by comparing the cells predicted to be
  active with the cells actually active in the sequence without noise at
  every timestep, and averaging across timesteps. Three metrics are used,
  correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard
  index) and cosine similarity (using scipy.spatial.distance.cosine). The
  Jaccard set similarity is the canonical metric used in the paper, but
  all three metrics tend to produce very similar results.

  Typically, this experiment is run to test the influence of activation
  threshold on noise tolerance, with multiple different thresholds tested.
  However, this experiment could also be used to examine the influence of
  factors such as sparsity and sequence length.

  Output is written to tm_noise_{threshold}}.txt, including sample size.

  We used three different activation threshold settings, 8, 12 and 16, mirroring
  the parameters used in the Poirazi neuron model experiment.
  """
    if automatic_threshold:
        activationThreshold = min(num_active / 2, maxNewSynapseCount / 2)
        minThreshold = min(num_active / 2, maxNewSynapseCount / 2)

    for noise in noise_range:
        print noise
        for trial in range(num_trials):
            tm = TM(
                columnDimensions=(dim, ),
                cellsPerColumn=cellsPerColumn,
                activationThreshold=activationThreshold,
                initialPermanence=initialPermanence,
                connectedPermanence=connectedPermanence,
                minThreshold=minThreshold,
                maxNewSynapseCount=maxNewSynapseCount,
                permanenceIncrement=permanenceIncrement,
                permanenceDecrement=permanenceDecrement,
                predictedSegmentDecrement=predictedSegmentDecrement,
                maxSegmentsPerCell=maxSegmentsPerCell,
                maxSynapsesPerSegment=maxSynapsesPerSegment,
            )  #seed=seed)

            datapoints = []
            canonical_active_cells = []

            for sample in range(num_samples):
                data = generate_evenly_distributed_data_sparse(
                    dim=dim,
                    num_active=num_active,
                    num_samples=sequence_length)
                datapoints.append(data)
                for i in range(training_iters):
                    for j in range(data.nRows()):
                        activeColumns = set(data.rowNonZeros(j)[0])
                        tm.compute(activeColumns, learn=True)
                    tm.reset()

                current_active_cells = []
                for j in range(data.nRows()):
                    activeColumns = set(data.rowNonZeros(j)[0])
                    tm.compute(activeColumns, learn=True)
                    current_active_cells.append(tm.getActiveCells())
                canonical_active_cells.append(current_active_cells)
                tm.reset()

            # Now that the TM has been trained, check its performance on each sequence with noise added.
            correlations = []
            similarities = []
            csims = []
            for datapoint, active_cells in zip(datapoints,
                                               canonical_active_cells):
                data = copy.deepcopy(datapoint)
                apply_noise(data, noise)

                predicted_cells = []

                for j in range(data.nRows()):
                    activeColumns = set(data.rowNonZeros(j)[0])
                    tm.compute(activeColumns, learn=False)
                    predicted_cells.append(tm.getPredictiveCells())

                similarity = [(0. + len(set(predicted) & set(active))) / len(
                    (set(predicted) | set(active)))
                              for predicted, active in zip(
                                  predicted_cells[:-1], active_cells[1:])]
                dense_predicted_cells = convert_cell_lists_to_dense(
                    2048 * 32, predicted_cells[:-1])
                dense_active_cells = convert_cell_lists_to_dense(
                    2048 * 32, active_cells[1:])
                correlation = [
                    numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1]
                    for predicted, active in zip(dense_predicted_cells,
                                                 dense_active_cells)
                ]
                csim = [
                    1 - cosine(predicted, active) for predicted, active in zip(
                        dense_predicted_cells, dense_active_cells)
                ]

                correlation = numpy.nan_to_num(correlation)
                csim = numpy.nan_to_num(csim)
                correlations.append(numpy.mean(correlation))
                similarities.append(numpy.mean(similarity))
                csims.append(numpy.mean(csim))

        correlation = numpy.mean(correlations)
        similarity = numpy.mean(similarities)
        csim = numpy.mean(csims)
        with open("tm_noise_{}.txt".format(activationThreshold), "a") as f:
            f.write(
                str(noise) + ", " + str(correlation) + ", " + str(similarity) +
                ", " + str(csim) + ", " + str(num_trials) + "\n")
def run_noise_experiment(num_neurons=1,
                         a=128,
                         dim=6000,
                         test_noise_levels=range(15, 100, 5),
                         num_samples=500,
                         num_dendrites=500,
                         dendrite_length=30,
                         theta=8,
                         num_trials=100):
    """
  Tests the impact of noise on a neuron, using an HTM approach to a P&M
  model of a neuron.  Nonlinearity is a simple threshold at theta, as in the
  original version of this experiment, and each dendrite is bound by the
  initialization to a single pattern.  Only one neuron is used, unlike in the
  P&M classification experiment, and a successful identification is simply
  defined as at least one dendrite having theta active synapses.

  Training is done via HTM-style initialization.  In the event that the init
  fails to produce an error rate of 0 without noise (which anecdotally never
  occurs), we simple reinitialize.

  Results are saved to the file noise_FN_{theta}.txt.

  This corresponds to the false negative vs. noise level figure in the paper.
  To generate the results shown, we used theta = 8, theta = 12 and theta = 16,
  with noise levels in range(15, 85, 5), 500 dendrites and 30 synapses per
  dendrite.  We generated 500 sample SDRs, one per dendrite, and ran 100 trials
  at each noise level.  Each SDR had a = 128, dim = 6000.
  """

    nonlinearity = threshold_nonlinearity(theta)
    for noise in test_noise_levels:

        fps = []
        fns = []

        for trial in range(num_trials):

            successful_initialization = False
            while not successful_initialization:
                neuron = Neuron(size=dendrite_length * num_dendrites,
                                num_dendrites=num_dendrites,
                                dendrite_length=dendrite_length,
                                dim=dim,
                                nonlinearity=nonlinearity)
                data = generate_evenly_distributed_data_sparse(
                    dim=dim, num_active=a, num_samples=num_samples)
                labels = [1 for i in range(num_samples)]

                neuron.HTM_style_initialize_on_data(data, labels)

                error, fp, fn = get_error(data, labels, [neuron])
                print "Initialization error is {}, with {} false positives and {} false negatives".format(
                    error, fp, fn)
                if error == 0:
                    successful_initialization = True
                else:
                    print "Repeating to get a successful initialization"

            apply_noise(data, noise)
            error, fp, fn = get_error(data, labels, [neuron])
            fps.append(fp)
            fns.append(fn)
            print "Error at noise {} is {}, with {} false positives and {} false negatives".format(
                noise, error, fp, fn)

        with open("noise_FN_{}.txt".format(theta), "a") as f:
            f.write(
                str(noise) + ", " + str(numpy.sum(fns)) + ", " +
                str(num_trials * num_samples) + "\n")
def run_tm_dim_experiment(test_dims = range(300, 3100, 100),
                          cellsPerColumn=1,
                          num_active = 256,
                          activationThreshold=10,
                          initialPermanence=0.8,
                          connectedPermanence=0.50,
                          minThreshold=10,
                          maxNewSynapseCount=20,
                          permanenceIncrement=0.05,
                          permanenceDecrement=0.00,
                          predictedSegmentDecrement=0.000,
                          maxSegmentsPerCell=4000,
                          maxSynapsesPerSegment=255,
                          seed=42,
                          num_samples = 1000,
                          sequence_length = 20,
                          training_iters = 1,
                          automatic_threshold = False,
                          save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  In our experiments, we used the set similarity metric (third column in output)
  along with three different values for num_active, 64, 128 and 256.  We used
  dimensions from 300 to 2900 in each case, testing every 100.  1000 sequences
  of length 20 were passed to the TM in each trial.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)
    print "Using activation threshold {}".format(activationThreshold)

  for dim in test_dims:
    tm = TM(columnDimensions=(dim,),
            cellsPerColumn=cellsPerColumn,
            activationThreshold=activationThreshold,
            initialPermanence=initialPermanence,
            connectedPermanence=connectedPermanence,
            minThreshold=minThreshold,
            maxNewSynapseCount=maxNewSynapseCount,
            permanenceIncrement=permanenceIncrement,
            permanenceDecrement=permanenceDecrement,
            predictedSegmentDecrement=predictedSegmentDecrement,
            maxSegmentsPerCell=maxSegmentsPerCell,
            maxSynapsesPerSegment=maxSynapsesPerSegment,
            seed=seed)

    tm.setMinThreshold(1000)

    datapoints = []
    canonical_active_cells = []

    for sample in range(num_samples):
      if (sample + 1) % 10 == 0:
        print sample + 1
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
      datapoints.append(data)
      for i in range(training_iters):
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
        tm.reset()

      current_active_cells = []
      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = True)
        current_active_cells.append(tm.getActiveCells())
      canonical_active_cells.append(current_active_cells)
      tm.reset()

    # Now that the TM has been trained, check its performance on each sequence with noise added.
    correlations = []
    similarities = []
    csims = []
    for datapoint, active_cells in zip(datapoints, canonical_active_cells):
      data = copy.deepcopy(datapoint)
      predicted_cells = []

      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
        predicted_cells.append(tm.getPredictiveCells())
      tm.reset()

      similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
      dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1])
      dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:])

      correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      correlation = numpy.nan_to_num(correlation)
      csim = numpy.nan_to_num(csim)
      correlations.append(numpy.mean(correlation))
      similarities.append(numpy.mean(similarity))
      csims.append(numpy.mean(csim))



    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    print dim, correlation, similarity, csim
    if save_results:
        with open("tm_dim_{}.txt".format(num_active), "a") as f:
          f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")