Пример #1
0
def load_sdrs(file_path):
    traces = loadTraces(file_path)
    num_records = len(traces['sensorValue'])
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 1
    start_idx = 1000
    end_idx = -1
    if start_idx < 0:
        start = num_records + start_idx
    else:
        start = start_idx
    if end_idx < 0:
        end = num_records + end_idx
    else:
        end = end_idx

    categories = traces['actualCategory'][start:end]
    active_cells = traces['tmActiveCells'][start:end]
    predicted_active_cells = traces['tmPredictedActiveCells'][start:end]
    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_active_cells_sdrs = np.array(
        convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) +
            float(predicted_active_cells_weight) * predicted_active_cells_sdrs)

    return sdrs, categories
Пример #2
0
def load_sdrs(file_path):
  traces = loadTraces(file_path)
  num_records = len(traces['sensorValue'])
  input_width = 2048 * 32
  active_cells_weight = 0
  predicted_active_cells_weight = 1
  start_idx = 1000
  end_idx = -1
  if start_idx < 0:
    start = num_records + start_idx
  else:
    start = start_idx
  if end_idx < 0:
    end = num_records + end_idx
  else:
    end = end_idx

  categories = traces['actualCategory'][start:end]
  active_cells = traces['tmActiveCells'][start:end]
  predicted_active_cells = traces['tmPredictedActiveCells'][start:end]
  # generate sdrs to cluster
  active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
  predicted_active_cells_sdrs = np.array(
    convert_to_sdrs(predicted_active_cells, input_width))
  sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) +
          float(predicted_active_cells_weight) * predicted_active_cells_sdrs)

  return sdrs, categories
Пример #3
0
def load_sdrs(start_idx, end_idx, exp_name):
  # Params
  input_width = 2048 * 32
  active_cells_weight = 0
  predicted_active_cells_weight = 1
  network_config = 'sp=True_tm=True_tp=False_SDRClassifier'

  # load traces
  file_name = get_file_name(exp_name, network_config)
  traces = loadTraces(file_name)
  num_records = len(traces['sensorValue'])

  # start and end 
  if start_idx < 0:
    start = num_records + start_idx
  else:
    start = start_idx
  if end_idx < 0:
    end = num_records + end_idx
  else:
    end = end_idx

  # input data
  sensor_values = traces['sensorValue'][start:end]
  categories = traces['actualCategory'][start:end]
  active_cells = traces['tmActiveCells'][start:end]
  predicted_active_cells = traces['tmPredictedActiveCells'][start:end]

  # generate sdrs to cluster
  active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
  predicted_active_cells_sdrs = np.array(
    convert_to_sdrs(predicted_active_cells, input_width))
  sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) +
          float(predicted_active_cells_weight) * predicted_active_cells_sdrs)

  return sdrs, categories
def load_sdrs(start_idx, end_idx, exp_name):
    # Params
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 1
    network_config = 'sp=True_tm=True_tp=False_SDRClassifier'

    # load traces
    file_name = get_file_name(exp_name, network_config)
    traces = loadTraces(file_name)
    num_records = len(traces['sensorValue'])

    # start and end
    if start_idx < 0:
        start = num_records + start_idx
    else:
        start = start_idx
    if end_idx < 0:
        end = num_records + end_idx
    else:
        end = end_idx

    # input data
    sensor_values = traces['sensorValue'][start:end]
    categories = traces['actualCategory'][start:end]
    active_cells = traces['tmActiveCells'][start:end]
    predicted_active_cells = traces['tmPredictedActiveCells'][start:end]

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_active_cells_sdrs = np.array(
        convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) +
            float(predicted_active_cells_weight) * predicted_active_cells_sdrs)

    return sdrs, categories
Пример #5
0
                    dest="numTmCells",
                    help="Number of cells in the Temporal Memory")

  (options, remainder) = parser.parse_args()
  return options, remainder



if __name__ == "__main__":

  (_options, _args) = _getArgs()
  inputFile = _options.fileName

  plotTemporalMemoryStates = _options.plotTemporalMemoryStates

  if _options.xl:
    xl = [int(x) for x in _options.xl.split(',')]
  else:
    xl = _options.xl

  print inputFile
  traces = loadTraces(inputFile)

  numTmCells = _options.numTmCells

  title = inputFile.split('/')[-1]
  outputFile = '%s.png' % inputFile[:-4]
  plt = plotTraces(xl, traces, title, ANOMALY_SCORE, outputFile, CLUSTERING,
                   numTmCells, plotTemporalMemoryStates)
  plt.show()
Пример #6
0
def meanInClusterDistances(cluster):
  overlaps = []
  perms = list(permutations(cluster, 2))
  for (sdr1, sdr2) in perms:
    overlap = percentOverlap(sdr1, sdr2)
    overlaps.append(overlap)
  return sum(overlaps) / len(overlaps)



if __name__ == "__main__":
  (_options, _args) = _getArgs()

  fileName = _options.fileName

  traces = loadTraces(fileName)
  outputDir = fileName[:-4]
  if not os.path.exists(outputDir):
    os.makedirs(outputDir)
  cellsType = CELLS_TO_CLUSTER
  numCells = 2048 * 32
  numSteps = len(traces['recordNumber'])
  pointsToPlot = numSteps / 10
  numClasses = len(set(traces['actualCategory']))
  vizInterCategoryClusters(traces,
                           outputDir,
                           cellsType,
                           numCells,
                           pointsToPlot)

  vizInterSequenceClusters(traces, outputDir, cellsType, numCells,
Пример #7
0
def main():
    distance_functions = [euclidian_distance]
    clustering_classes = [PerfectClustering, OnlineClusteringV2]

    # Exp params
    moving_average_window = 2  # for all moving averages of the experiment
    ClusteringClass = clustering_classes[1]
    distance_func = distance_functions[0]
    merge_threshold = 30  # Cutoff distance to merge clusters. 'None' to ignore.
    start_idx = 0
    end_idx = -1
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 10
    max_num_clusters = 3
    num_cluster_snapshots = 1
    show_plots = True
    distance_matrix_ignore_noise = False  # ignore label 0 if used to label noise.
    exp_name = 'body_acc_x_inertial_signals_train'

    # Clean an create output directory for the graphs
    plots_output_dir = 'plots/%s' % exp_name
    if os.path.exists(plots_output_dir):
        shutil.rmtree(plots_output_dir)
    os.makedirs(plots_output_dir)

    # load traces
    file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             os.pardir, 'htm', 'traces',
                             'trace_%s.csv' % exp_name)
    traces = loadTraces(file_path)
    num_records = len(traces['scalarValue'])

    # start and end for the x axis of the graphs
    if start_idx < 0:
        start = num_records + start_idx
    else:
        start = start_idx
    if end_idx < 0:
        end = num_records + end_idx
    else:
        end = end_idx
    xlim = [0, end - start]

    # input data
    sensor_values = traces['scalarValue'][start:end]
    categories = traces['label'][start:end]
    active_cells = traces['tmActiveCells'][start:end]
    predicted_active_cells = traces['tmPredictedActiveCells'][start:end]
    raw_anomaly_scores = traces['rawAnomalyScore'][start:end]
    anomaly_scores = []
    anomaly_score_ma = 0.0
    for raw_anomaly_score in raw_anomaly_scores:
        anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score,
                                          moving_average_window)
        anomaly_scores.append(anomaly_score_ma)

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_active_cells_sdrs = np.array(
        convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) +
            float(predicted_active_cells_weight) * predicted_active_cells_sdrs)

    # list of timesteps specifying when a snapshot of the clusters will be taken
    step = (end - start) / num_cluster_snapshots - 1
    cluster_snapshot_indices = range(step, end - start, step)

    # run clustering
    (clustering_accuracies, cluster_snapshots,
     closest_cluster_history) = run(sdrs, categories, anomaly_scores,
                                    distance_func, moving_average_window,
                                    max_num_clusters, ClusteringClass,
                                    merge_threshold, cluster_snapshot_indices)
    # cluster_categories = []
    # for c in closest_cluster_history:
    #   if c is not None:
    #     cluster_categories.append(c.label_distribution()[0]['label'])

    # plot cluster assignments over time
    for i in range(num_cluster_snapshots):
        clusters = cluster_snapshots[i]
        snapshot_index = cluster_snapshot_indices[i]
        plot_cluster_assignments(plots_output_dir, clusters, snapshot_index)

        # plot inter-cluster distance matrix
        # plot_id = 'inter-cluster_t=%s' % snapshot_index
        # plot_inter_sequence_distances(plots_output_dir,
        #                               plot_id,
        #                               distance_func,
        #                               sdrs[:snapshot_index],
        #                               cluster_categories[:snapshot_index],
        #                               distance_matrix_ignore_noise)

        # plot inter-category distance matrix
        plot_id = 'inter-category_t=%s ' % snapshot_index
        plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func,
                                      sdrs[:snapshot_index],
                                      categories[:snapshot_index],
                                      distance_matrix_ignore_noise)

    # plot clustering accuracy over time
    plot_id = 'file=%s | moving_average_window=%s' % (exp_name,
                                                      moving_average_window)
    plot_accuracy(plots_output_dir, plot_id, sensor_values, categories,
                  anomaly_scores, clustering_accuracies, xlim)

    if show_plots:
        plt.show()
Пример #8
0
def main():
    distance_functions = [euclidian_distance]
    clustering_classes = [PerfectClustering, OnlineClustering]
    network_config = 'sp=True_tm=True_tp=False_SDRClassifier'
    exp_names = [
        'binary_ampl=10.0_mean=0.0_noise=0.0',
        'binary_ampl=10.0_mean=0.0_noise=1.0', 'sensortag_z'
    ]

    # Exp params
    moving_average_window = 1  # for all moving averages of the experiment
    ClusteringClass = clustering_classes[0]
    distance_func = distance_functions[0]
    exp_name = exp_names[0]
    start_idx = 0
    end_idx = 100
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 1
    max_num_clusters = 3
    num_cluster_snapshots = 2
    show_plots = False
    distance_matrix_ignore_noise = True  # whether to ignore label 0 (noise)

    # Clean an create output directory for the graphs
    plots_output_dir = 'plots/%s' % exp_name
    if os.path.exists(plots_output_dir):
        shutil.rmtree(plots_output_dir)
    os.makedirs(plots_output_dir)

    # load traces
    file_name = get_file_name(exp_name, network_config)
    traces = loadTraces(file_name)
    sensor_values = traces['sensorValue'][start_idx:end_idx]
    categories = traces['actualCategory'][start_idx:end_idx]
    raw_anomaly_scores = traces['rawAnomalyScore'][start_idx:end_idx]
    anomaly_scores = []
    anomaly_score_ma = 0.0
    for raw_anomaly_score in raw_anomaly_scores:
        anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score,
                                          moving_average_window)
        anomaly_scores.append(anomaly_score_ma)

    active_cells = traces['tmActiveCells'][start_idx:end_idx]
    predicted_active_cells = traces['tmPredictedActiveCells'][
        start_idx:end_idx]

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_activeCells_sdrs = np.array(
        convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (active_cells_weight * np.array(active_cells_sdrs) +
            predicted_active_cells_weight * predicted_activeCells_sdrs)

    # start and end for the x axis of the graphs
    start = start_idx
    if end_idx < 0:
        end = len(sdrs) - end_idx - 1
    else:
        end = end_idx
    xlim = [start, end]

    # list of timesteps specifying when a snapshot of the clusters will be taken
    step = (end - start) / num_cluster_snapshots - 1
    cluster_snapshot_indices = range(start + step, end, step)

    # run clustering
    (clustering_accuracies, cluster_snapshots,
     closest_cluster_history) = run(sdrs, categories, distance_func,
                                    moving_average_window, max_num_clusters,
                                    ClusteringClass, cluster_snapshot_indices)

    # plot cluster assignments over time
    for i in range(num_cluster_snapshots):
        clusters = cluster_snapshots[i]
        plot_cluster_assignments(plots_output_dir, clusters,
                                 cluster_snapshot_indices[i])

        # plot inter-cluster distance matrix
        cluster_ids = [c.id for c in closest_cluster_history if c is not None]
        plot_id = 'inter-cluster_t=%s' % cluster_snapshot_indices[i]
        plot_inter_sequence_distances(
            plots_output_dir, plot_id, distance_func,
            sdrs[:cluster_snapshot_indices[i]],
            cluster_ids[:cluster_snapshot_indices[i]],
            distance_matrix_ignore_noise)

        # plot inter-category distance matrix
        plot_id = 'inter-category_t=%s ' % cluster_snapshot_indices[i]
        plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func,
                                      sdrs[:cluster_snapshot_indices[i]],
                                      categories[:cluster_snapshot_indices[i]],
                                      distance_matrix_ignore_noise)

    # plot clustering accuracy over time
    plot_id = 'file=%s | moving_average_window=%s' % (exp_name,
                                                      moving_average_window)
    plot_accuracy(plots_output_dir, plot_id, sensor_values, categories,
                  anomaly_scores, clustering_accuracies, xlim)

    if show_plots:
        plt.show()
                    default=32 * 2048,
                    dest="numTmCells",
                    help="Number of cells in the Temporal Memory")

  (options, remainder) = parser.parse_args()
  return options, remainder



if __name__ == "__main__":

  (_options, _args) = _getArgs()
  inputFile = _options.fileName

  plotTemporalMemoryStates = _options.plotTemporalMemoryStates

  if _options.xl:
    xl = [int(x) for x in _options.xl.split(',')]
  else:
    xl = _options.xl

  print inputFile
  traces = loadTraces(inputFile)

  numTmCells = _options.numTmCells

  title = inputFile.split('/')[-1]
  outputFile = '%s.png' % inputFile[:-4]
  plotTraces(xl, traces, title, ANOMALY_SCORE, outputFile, numTmCells,
             plotTemporalMemoryStates)
def main():
  distance_functions = [euclidian_distance]
  clustering_classes = [PerfectClustering, OnlineClustering]
  network_config = 'sp=True_tm=True_tp=False_SDRClassifier'
  exp_names = ['binary_ampl=10.0_mean=0.0_noise=0.0',
               'binary_ampl=10.0_mean=0.0_noise=1.0',
               'sensortag_z']

  # Exp params
  moving_average_window = 1  # for all moving averages of the experiment
  ClusteringClass = clustering_classes[0]
  distance_func = distance_functions[0]
  exp_name = exp_names[0]
  start_idx = 0
  end_idx = 100
  input_width = 2048 * 32
  active_cells_weight = 0
  predicted_active_cells_weight = 1
  max_num_clusters = 3
  num_cluster_snapshots = 2
  show_plots = False
  distance_matrix_ignore_noise = True  # whether to ignore label 0 (noise)

  # Clean an create output directory for the graphs
  plots_output_dir = 'plots/%s' % exp_name
  if os.path.exists(plots_output_dir):
    shutil.rmtree(plots_output_dir)
  os.makedirs(plots_output_dir)

  # load traces
  file_name = get_file_name(exp_name, network_config)
  traces = loadTraces(file_name)
  sensor_values = traces['sensorValue'][start_idx:end_idx]
  categories = traces['actualCategory'][start_idx:end_idx]
  raw_anomaly_scores = traces['rawAnomalyScore'][start_idx:end_idx]
  anomaly_scores = []
  anomaly_score_ma = 0.0
  for raw_anomaly_score in raw_anomaly_scores:
    anomaly_score_ma = moving_average(anomaly_score_ma,
                                      raw_anomaly_score,
                                      moving_average_window)
    anomaly_scores.append(anomaly_score_ma)

  active_cells = traces['tmActiveCells'][start_idx:end_idx]
  predicted_active_cells = traces['tmPredictedActiveCells'][start_idx:end_idx]

  # generate sdrs to cluster
  active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
  predicted_activeCells_sdrs = np.array(convert_to_sdrs(predicted_active_cells,
                                                        input_width))
  sdrs = (active_cells_weight * np.array(active_cells_sdrs) +
          predicted_active_cells_weight * predicted_activeCells_sdrs)

  # start and end for the x axis of the graphs
  start = start_idx
  if end_idx < 0:
    end = len(sdrs) - end_idx - 1
  else:
    end = end_idx
  xlim = [start, end]

  # list of timesteps specifying when a snapshot of the clusters will be taken
  step = (end - start) / num_cluster_snapshots - 1
  cluster_snapshot_indices = range(start + step, end, step)

  # run clustering
  (clustering_accuracies,
   cluster_snapshots,
   closest_cluster_history) = run(sdrs,
                                  categories,
                                  distance_func,
                                  moving_average_window,
                                  max_num_clusters,
                                  ClusteringClass,
                                  cluster_snapshot_indices)

  # plot cluster assignments over time
  for i in range(num_cluster_snapshots):
    clusters = cluster_snapshots[i]
    plot_cluster_assignments(plots_output_dir, clusters, cluster_snapshot_indices[i])

    # plot inter-cluster distance matrix
    cluster_ids = [c.id for c in closest_cluster_history if c is not None]
    plot_id = 'inter-cluster_t=%s' % cluster_snapshot_indices[i]
    plot_inter_sequence_distances(plots_output_dir, 
                                  plot_id, 
                                  distance_func, 
                                  sdrs[:cluster_snapshot_indices[i]],
                                  cluster_ids[:cluster_snapshot_indices[i]], 
                                  distance_matrix_ignore_noise)

    # plot inter-category distance matrix
    plot_id = 'inter-category_t=%s ' % cluster_snapshot_indices[i]
    plot_inter_sequence_distances(plots_output_dir,
                                  plot_id,
                                  distance_func,
                                  sdrs[:cluster_snapshot_indices[i]],
                                  categories[:cluster_snapshot_indices[i]],
                                  distance_matrix_ignore_noise)

  # plot clustering accuracy over time
  plot_id = 'file=%s | moving_average_window=%s' % (exp_name,
                                                    moving_average_window)
  plot_accuracy(plots_output_dir,
                plot_id,
                sensor_values,
                categories,
                anomaly_scores,
                clustering_accuracies,
                xlim)

  if show_plots:
    plt.show()
Пример #11
0
def main():
    distance_functions = [euclidian_distance]
    clustering_classes = [PerfectClustering, OnlineClusteringV2]
    network_config = "sp=True_tm=True_tp=False_SDRClassifier"
    exp_names = [
        "body_acc_x",
        "binary_ampl=10.0_mean=0.0_noise=0.0",
        "binary_ampl=10.0_mean=0.0_noise=1.0",
        "sensortag_z",
    ]

    # Exp params
    moving_average_window = 2  # for all moving averages of the experiment
    ClusteringClass = clustering_classes[1]
    distance_func = distance_functions[0]
    exp_name = exp_names[0]
    start_idx = 1000
    end_idx = 12000
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 10
    max_num_clusters = 3
    num_cluster_snapshots = 1
    show_plots = True
    distance_matrix_ignore_noise = True  # whether to ignore label 0 (noise)

    # Clean an create output directory for the graphs
    plots_output_dir = "plots/%s" % exp_name
    if os.path.exists(plots_output_dir):
        shutil.rmtree(plots_output_dir)
    os.makedirs(plots_output_dir)

    # load traces
    file_name = get_file_name(exp_name, network_config)
    traces = loadTraces(file_name)
    num_records = len(traces["sensorValue"])

    # start and end for the x axis of the graphs
    if start_idx < 0:
        start = num_records + start_idx
    else:
        start = start_idx
    if end_idx < 0:
        end = num_records + end_idx
    else:
        end = end_idx
    xlim = [0, end - start]

    # input data
    sensor_values = traces["sensorValue"][start:end]
    categories = traces["actualCategory"][start:end]
    active_cells = traces["tmActiveCells"][start:end]
    predicted_active_cells = traces["tmPredictedActiveCells"][start:end]
    raw_anomaly_scores = traces["rawAnomalyScore"][start:end]
    anomaly_scores = []
    anomaly_score_ma = 0.0
    for raw_anomaly_score in raw_anomaly_scores:
        anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score, moving_average_window)
        anomaly_scores.append(anomaly_score_ma)

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_active_cells_sdrs = np.array(convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (
        float(active_cells_weight) * np.array(active_cells_sdrs)
        + float(predicted_active_cells_weight) * predicted_active_cells_sdrs
    )

    # list of timesteps specifying when a snapshot of the clusters will be taken
    step = (end - start) / num_cluster_snapshots - 1
    cluster_snapshot_indices = range(step, end - start, step)

    # run clustering
    (clustering_accuracies, cluster_snapshots, closest_cluster_history) = run(
        sdrs,
        categories,
        anomaly_scores,
        distance_func,
        moving_average_window,
        max_num_clusters,
        ClusteringClass,
        cluster_snapshot_indices,
    )
    # cluster_categories = []
    # for c in closest_cluster_history:
    #   if c is not None:
    #     cluster_categories.append(c.label_distribution()[0]['label'])

    # plot cluster assignments over time
    for i in range(num_cluster_snapshots):
        clusters = cluster_snapshots[i]
        snapshot_index = cluster_snapshot_indices[i]
        plot_cluster_assignments(plots_output_dir, clusters, snapshot_index)

        # plot inter-cluster distance matrix
        # plot_id = 'inter-cluster_t=%s' % snapshot_index
        # plot_inter_sequence_distances(plots_output_dir,
        #                               plot_id,
        #                               distance_func,
        #                               sdrs[:snapshot_index],
        #                               cluster_categories[:snapshot_index],
        #                               distance_matrix_ignore_noise)

        # plot inter-category distance matrix
        plot_id = "inter-category_t=%s " % snapshot_index
        plot_inter_sequence_distances(
            plots_output_dir,
            plot_id,
            distance_func,
            sdrs[:snapshot_index],
            categories[:snapshot_index],
            distance_matrix_ignore_noise,
        )

    # plot clustering accuracy over time
    plot_id = "file=%s | moving_average_window=%s" % (exp_name, moving_average_window)
    plot_accuracy(plots_output_dir, plot_id, sensor_values, categories, anomaly_scores, clustering_accuracies, xlim)

    if show_plots:
        plt.show()