Exemplo n.º 1
0
def fine_tune(num_fold, data):
    #InceptionResNetV2 - 774:
    #Xception - 126
    print('Fine-Tuning')
    model = None
    train_data = None
    val_data = None
    isBest = True

    train_data, val_data = data[num_fold - 1][0], data[num_fold - 1][1]
    model = model_dispatcher.return_model(config.MODELS[num_fold - 1])

    model = load_model(get_model_name(num_fold))
    #LAYERS_TO_TRAIN = some_arbitrary_value
    print(model.summary())
    for layers in model.layers[1].layers[config.LAYERS_TO_TRAIN[config.
                                                                MODELS[num_fold
                                                                       - 1]]:]:
        layers.trainable = True
    print(model.summary())

    mc, reduce_lr = return_callbacks(num_fold, isBest)

    opt = return_opt('adam', learning_rate=1e-5)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(train_data,
                        validation_data=val_data,
                        epochs=20,
                        callbacks=[mc, reduce_lr],
                        steps_per_epoch=train_data.__len__())
    plot.plot_loss(history)
    plot.plot_accuracy(history)
Exemplo n.º 2
0
def part_3_4():
    """
    Test hopfield network's robustness on different settings of noisy data
    :param p: dictionary of different patterns
    """

    # Train with 3 patterns
    p_train = [p[0].flatten(), p[1].flatten(), p[2].flatten()]

    train_data = np.asarray(p_train)

    h_net = HopfieldNet(train_data)

    h_net.batch_train()

    # Choose a pattern and add noise to it
    test_pattern = 2  # Choose between 0, 1, 2

    p_test = [p[test_pattern].flatten()]

    test_data = np.asarray(p_test)

    # Set noise percentages to test on [start, end, step]
    noise_percentages = np.arange(0, 101, 1)

    n_runs = 1
    runs = []
    for run in range(n_runs):
        acc = {}
        # Test for different percentages of noise
        for noise_perc in noise_percentages:
            # add noise to test data
            noisy_test_data = add_noise(test_data[0], noise_perc)
            # try to recall
            test_pred = h_net.recall([noisy_test_data], epochs=batch_epochs)

            acc[noise_perc] = calc_acc(test_data[0], test_pred[0])

            if show_plots:
                test_pred_1 = test_pred[0].reshape(32,
                                                   32)  # prepare for plotting

                show_tested(noisy_test_data,
                            test_pred_1,
                            test_pred_1.shape[0],
                            test_pred_1.shape[1],
                            title="Testing with " + str(noise_perc) +
                            "% noise")

        # plot_accuracy(acc)
        runs.append(acc)

    average_acc = {}
    for noise_perc in acc.keys():
        av_acc_i = 0.
        for run in range(n_runs):
            av_acc_i += runs[run][noise_perc]

        average_acc[noise_perc] = av_acc_i / float(n_runs)
    plot_accuracy(average_acc)
Exemplo n.º 3
0
def train(num_fold):
    isBest = True
    data_frame = pd.read_csv(config.TRAIN_FACIAL_LANDMARKS_CSV_PATH,
                             header=None)

    data_frame = preprocess.customFacialLandmarks(data_frame,
                                                  config.CLASS_LABEL)

    X = data_frame.iloc[:, :-1].copy().values
    Y = data_frame.iloc[:, -1].copy().values.reshape(-1, 1)
    X_train, X_test, Y_train, Y_test = return_split(X, Y)

    model = model_dispatcher.return_model(num_fold)
    Y_train_enc, Y_test_enc = conver_to_ohe(Y_train, Y_test)
    print(Y_train_enc.shape, Y_test_enc.shape)

    mc, reduce_lr = return_callbacks(num_fold, isBest)

    opt = return_opt('adam', learning_rate=0.01)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(X_train,
                        Y_train_enc,
                        epochs=100,
                        validation_data=(X_test, Y_test_enc),
                        batch_size=config.BATCH_SIZE,
                        callbacks=[mc, reduce_lr])

    plot.plot_loss(history)
    plot.plot_accuracy(history)
Exemplo n.º 4
0
def main():
    distance_functions = [euclidian_distance]
    clustering_classes = [PerfectClustering, OnlineClusteringV2]

    # Exp params
    moving_average_window = 2  # for all moving averages of the experiment
    ClusteringClass = clustering_classes[1]
    distance_func = distance_functions[0]
    merge_threshold = 30  # Cutoff distance to merge clusters. 'None' to ignore.
    start_idx = 0
    end_idx = -1
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 10
    max_num_clusters = 3
    num_cluster_snapshots = 1
    show_plots = True
    distance_matrix_ignore_noise = False  # ignore label 0 if used to label noise.
    exp_name = 'body_acc_x_inertial_signals_train'

    # Clean an create output directory for the graphs
    plots_output_dir = 'plots/%s' % exp_name
    if os.path.exists(plots_output_dir):
        shutil.rmtree(plots_output_dir)
    os.makedirs(plots_output_dir)

    # load traces
    file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             os.pardir, 'htm', 'traces',
                             'trace_%s.csv' % exp_name)
    traces = loadTraces(file_path)
    num_records = len(traces['scalarValue'])

    # start and end for the x axis of the graphs
    if start_idx < 0:
        start = num_records + start_idx
    else:
        start = start_idx
    if end_idx < 0:
        end = num_records + end_idx
    else:
        end = end_idx
    xlim = [0, end - start]

    # input data
    sensor_values = traces['scalarValue'][start:end]
    categories = traces['label'][start:end]
    active_cells = traces['tmActiveCells'][start:end]
    predicted_active_cells = traces['tmPredictedActiveCells'][start:end]
    raw_anomaly_scores = traces['rawAnomalyScore'][start:end]
    anomaly_scores = []
    anomaly_score_ma = 0.0
    for raw_anomaly_score in raw_anomaly_scores:
        anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score,
                                          moving_average_window)
        anomaly_scores.append(anomaly_score_ma)

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_active_cells_sdrs = np.array(
        convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (float(active_cells_weight) * np.array(active_cells_sdrs) +
            float(predicted_active_cells_weight) * predicted_active_cells_sdrs)

    # list of timesteps specifying when a snapshot of the clusters will be taken
    step = (end - start) / num_cluster_snapshots - 1
    cluster_snapshot_indices = range(step, end - start, step)

    # run clustering
    (clustering_accuracies, cluster_snapshots,
     closest_cluster_history) = run(sdrs, categories, anomaly_scores,
                                    distance_func, moving_average_window,
                                    max_num_clusters, ClusteringClass,
                                    merge_threshold, cluster_snapshot_indices)
    # cluster_categories = []
    # for c in closest_cluster_history:
    #   if c is not None:
    #     cluster_categories.append(c.label_distribution()[0]['label'])

    # plot cluster assignments over time
    for i in range(num_cluster_snapshots):
        clusters = cluster_snapshots[i]
        snapshot_index = cluster_snapshot_indices[i]
        plot_cluster_assignments(plots_output_dir, clusters, snapshot_index)

        # plot inter-cluster distance matrix
        # plot_id = 'inter-cluster_t=%s' % snapshot_index
        # plot_inter_sequence_distances(plots_output_dir,
        #                               plot_id,
        #                               distance_func,
        #                               sdrs[:snapshot_index],
        #                               cluster_categories[:snapshot_index],
        #                               distance_matrix_ignore_noise)

        # plot inter-category distance matrix
        plot_id = 'inter-category_t=%s ' % snapshot_index
        plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func,
                                      sdrs[:snapshot_index],
                                      categories[:snapshot_index],
                                      distance_matrix_ignore_noise)

    # plot clustering accuracy over time
    plot_id = 'file=%s | moving_average_window=%s' % (exp_name,
                                                      moving_average_window)
    plot_accuracy(plots_output_dir, plot_id, sensor_values, categories,
                  anomaly_scores, clustering_accuracies, xlim)

    if show_plots:
        plt.show()
Exemplo n.º 5
0
model.add(layers.Dense(46, activation='softmax'))



# 验证
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

# y_val = one_hot_train_labels[:1000]
# partial_y_train = one_hot_train_labels[1000:]
# model.compile(optimizer='rmsprop',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])


partial_y_train = train_labels[1000:]
y_val = train_labels[:1000]
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['acc'])
print(model.summary())
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))



import plot
plot.plot_accuracy(history)
plot.plot_loss(history)
Exemplo n.º 6
0
def main():
    distance_functions = [euclidian_distance]
    clustering_classes = [PerfectClustering, OnlineClustering]
    network_config = 'sp=True_tm=True_tp=False_SDRClassifier'
    exp_names = [
        'binary_ampl=10.0_mean=0.0_noise=0.0',
        'binary_ampl=10.0_mean=0.0_noise=1.0', 'sensortag_z'
    ]

    # Exp params
    moving_average_window = 1  # for all moving averages of the experiment
    ClusteringClass = clustering_classes[0]
    distance_func = distance_functions[0]
    exp_name = exp_names[0]
    start_idx = 0
    end_idx = 100
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 1
    max_num_clusters = 3
    num_cluster_snapshots = 2
    show_plots = False
    distance_matrix_ignore_noise = True  # whether to ignore label 0 (noise)

    # Clean an create output directory for the graphs
    plots_output_dir = 'plots/%s' % exp_name
    if os.path.exists(plots_output_dir):
        shutil.rmtree(plots_output_dir)
    os.makedirs(plots_output_dir)

    # load traces
    file_name = get_file_name(exp_name, network_config)
    traces = loadTraces(file_name)
    sensor_values = traces['sensorValue'][start_idx:end_idx]
    categories = traces['actualCategory'][start_idx:end_idx]
    raw_anomaly_scores = traces['rawAnomalyScore'][start_idx:end_idx]
    anomaly_scores = []
    anomaly_score_ma = 0.0
    for raw_anomaly_score in raw_anomaly_scores:
        anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score,
                                          moving_average_window)
        anomaly_scores.append(anomaly_score_ma)

    active_cells = traces['tmActiveCells'][start_idx:end_idx]
    predicted_active_cells = traces['tmPredictedActiveCells'][
        start_idx:end_idx]

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_activeCells_sdrs = np.array(
        convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (active_cells_weight * np.array(active_cells_sdrs) +
            predicted_active_cells_weight * predicted_activeCells_sdrs)

    # start and end for the x axis of the graphs
    start = start_idx
    if end_idx < 0:
        end = len(sdrs) - end_idx - 1
    else:
        end = end_idx
    xlim = [start, end]

    # list of timesteps specifying when a snapshot of the clusters will be taken
    step = (end - start) / num_cluster_snapshots - 1
    cluster_snapshot_indices = range(start + step, end, step)

    # run clustering
    (clustering_accuracies, cluster_snapshots,
     closest_cluster_history) = run(sdrs, categories, distance_func,
                                    moving_average_window, max_num_clusters,
                                    ClusteringClass, cluster_snapshot_indices)

    # plot cluster assignments over time
    for i in range(num_cluster_snapshots):
        clusters = cluster_snapshots[i]
        plot_cluster_assignments(plots_output_dir, clusters,
                                 cluster_snapshot_indices[i])

        # plot inter-cluster distance matrix
        cluster_ids = [c.id for c in closest_cluster_history if c is not None]
        plot_id = 'inter-cluster_t=%s' % cluster_snapshot_indices[i]
        plot_inter_sequence_distances(
            plots_output_dir, plot_id, distance_func,
            sdrs[:cluster_snapshot_indices[i]],
            cluster_ids[:cluster_snapshot_indices[i]],
            distance_matrix_ignore_noise)

        # plot inter-category distance matrix
        plot_id = 'inter-category_t=%s ' % cluster_snapshot_indices[i]
        plot_inter_sequence_distances(plots_output_dir, plot_id, distance_func,
                                      sdrs[:cluster_snapshot_indices[i]],
                                      categories[:cluster_snapshot_indices[i]],
                                      distance_matrix_ignore_noise)

    # plot clustering accuracy over time
    plot_id = 'file=%s | moving_average_window=%s' % (exp_name,
                                                      moving_average_window)
    plot_accuracy(plots_output_dir, plot_id, sensor_values, categories,
                  anomaly_scores, clustering_accuracies, xlim)

    if show_plots:
        plt.show()
Exemplo n.º 7
0
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000, )))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))

# 验证
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

# y_val = one_hot_train_labels[:1000]
# partial_y_train = one_hot_train_labels[1000:]
# model.compile(optimizer='rmsprop',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

partial_y_train = train_labels[1000:]
y_val = train_labels[:1000]
model.compile(optimizer='rmsprop',
              loss='sparse_categorical_crossentropy',
              metrics=['acc'])
print(model.summary())
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

import plot
plot.plot_accuracy(history)
plot.plot_loss(history)
def main():
  distance_functions = [euclidian_distance]
  clustering_classes = [PerfectClustering, OnlineClustering]
  network_config = 'sp=True_tm=True_tp=False_SDRClassifier'
  exp_names = ['binary_ampl=10.0_mean=0.0_noise=0.0',
               'binary_ampl=10.0_mean=0.0_noise=1.0',
               'sensortag_z']

  # Exp params
  moving_average_window = 1  # for all moving averages of the experiment
  ClusteringClass = clustering_classes[0]
  distance_func = distance_functions[0]
  exp_name = exp_names[0]
  start_idx = 0
  end_idx = 100
  input_width = 2048 * 32
  active_cells_weight = 0
  predicted_active_cells_weight = 1
  max_num_clusters = 3
  num_cluster_snapshots = 2
  show_plots = False
  distance_matrix_ignore_noise = True  # whether to ignore label 0 (noise)

  # Clean an create output directory for the graphs
  plots_output_dir = 'plots/%s' % exp_name
  if os.path.exists(plots_output_dir):
    shutil.rmtree(plots_output_dir)
  os.makedirs(plots_output_dir)

  # load traces
  file_name = get_file_name(exp_name, network_config)
  traces = loadTraces(file_name)
  sensor_values = traces['sensorValue'][start_idx:end_idx]
  categories = traces['actualCategory'][start_idx:end_idx]
  raw_anomaly_scores = traces['rawAnomalyScore'][start_idx:end_idx]
  anomaly_scores = []
  anomaly_score_ma = 0.0
  for raw_anomaly_score in raw_anomaly_scores:
    anomaly_score_ma = moving_average(anomaly_score_ma,
                                      raw_anomaly_score,
                                      moving_average_window)
    anomaly_scores.append(anomaly_score_ma)

  active_cells = traces['tmActiveCells'][start_idx:end_idx]
  predicted_active_cells = traces['tmPredictedActiveCells'][start_idx:end_idx]

  # generate sdrs to cluster
  active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
  predicted_activeCells_sdrs = np.array(convert_to_sdrs(predicted_active_cells,
                                                        input_width))
  sdrs = (active_cells_weight * np.array(active_cells_sdrs) +
          predicted_active_cells_weight * predicted_activeCells_sdrs)

  # start and end for the x axis of the graphs
  start = start_idx
  if end_idx < 0:
    end = len(sdrs) - end_idx - 1
  else:
    end = end_idx
  xlim = [start, end]

  # list of timesteps specifying when a snapshot of the clusters will be taken
  step = (end - start) / num_cluster_snapshots - 1
  cluster_snapshot_indices = range(start + step, end, step)

  # run clustering
  (clustering_accuracies,
   cluster_snapshots,
   closest_cluster_history) = run(sdrs,
                                  categories,
                                  distance_func,
                                  moving_average_window,
                                  max_num_clusters,
                                  ClusteringClass,
                                  cluster_snapshot_indices)

  # plot cluster assignments over time
  for i in range(num_cluster_snapshots):
    clusters = cluster_snapshots[i]
    plot_cluster_assignments(plots_output_dir, clusters, cluster_snapshot_indices[i])

    # plot inter-cluster distance matrix
    cluster_ids = [c.id for c in closest_cluster_history if c is not None]
    plot_id = 'inter-cluster_t=%s' % cluster_snapshot_indices[i]
    plot_inter_sequence_distances(plots_output_dir, 
                                  plot_id, 
                                  distance_func, 
                                  sdrs[:cluster_snapshot_indices[i]],
                                  cluster_ids[:cluster_snapshot_indices[i]], 
                                  distance_matrix_ignore_noise)

    # plot inter-category distance matrix
    plot_id = 'inter-category_t=%s ' % cluster_snapshot_indices[i]
    plot_inter_sequence_distances(plots_output_dir,
                                  plot_id,
                                  distance_func,
                                  sdrs[:cluster_snapshot_indices[i]],
                                  categories[:cluster_snapshot_indices[i]],
                                  distance_matrix_ignore_noise)

  # plot clustering accuracy over time
  plot_id = 'file=%s | moving_average_window=%s' % (exp_name,
                                                    moving_average_window)
  plot_accuracy(plots_output_dir,
                plot_id,
                sensor_values,
                categories,
                anomaly_scores,
                clustering_accuracies,
                xlim)

  if show_plots:
    plt.show()
Exemplo n.º 9
0
#Classification for C in [10^-3,10^3]
classificators = []
#Plotter
my_plt = plotting_grid(fig_r=12, fig_c=11, grid_r=4, grid_c=2)

for j, counter in enumerate(C_value):
    clf = svm.SVC(kernel='linear', C=counter).fit(X_train, y_train)
    accuracy_list.append(clf.score(X_validation, y_validation))
    classificators.append(clf)
    my_plt.plot(X_validation, clf, "Classification for C = {}".format(counter),
                j)

my_plt.save("classification.png")
my_plt.show()

plot_accuracy(accuracy_list)
print(tabulate([accuracy_list]))

#best_C=C_value[]
for j, c in enumerate(C_value):
    title = "Best classification with C = {}".format(c)
    clf_best = svm.SVC(kernel='linear', C=counter).fit(X_train, y_train)
    plot(X_validation, clf_best, title)
    print("\n\nAccuracy on test data : " +
          str(clf_best.score(X_test, y_test) * 100) + " % \n")

#    NON  LINEAR SVM - RBF
###############################################################################
# Define accuracy
accuracy_list = []
#Classification for C in [10^-3,10^3]
Exemplo n.º 10
0
def train():
    print('Starting Training')
    data_frame = pd.read_csv(config.TRAIN_CSV_PATH)
    Y = data_frame[['Label']].copy()
    num_fold = 1
    data = dict()
    isBest = False
    for train_idx, val_idx in return_split(Y):
        if (num_fold in config.LIST_OF_FOLD_EXCEPTIONS):
            import train_facialLandmarks
            train_facialLandmarks.train(num_fold)
        else:
            train_df = data_frame.iloc[train_idx]
            val_df = data_frame.iloc[val_idx]
            train_datagen, val_datagen = return_gen(num_fold)

            train_data = train_datagen.flow_from_dataframe(
                dataframe=train_df,
                directory=None,
                x_col="Image",
                y_col="Label",
                target_size=(config.TARGET_SIZE[config.MODELS[num_fold -
                                                              1]][0],
                             config.TARGET_SIZE[config.MODELS[num_fold -
                                                              1]][1]),
                class_mode="categorical",
                shuffle=True,
                batch_size=config.BATCH_SIZE,
                seed=42)

            val_data = val_datagen.flow_from_dataframe(
                dataframe=val_df,
                directory=None,
                x_col="Image",
                y_col="Label",
                target_size=(config.TARGET_SIZE[config.MODELS[num_fold -
                                                              1]][0],
                             config.TARGET_SIZE[config.MODELS[num_fold -
                                                              1]][1]),
                class_mode="categorical",
                shuffle=True,
                batch_size=config.BATCH_SIZE,
                seed=42)
            print(train_data.class_indices)
            model = model_dispatcher.return_model(num_fold)
            mc, reduce_lr = return_callbacks(num_fold, isBest)

            opt = return_opt('adam', learning_rate=0.01)
            model.compile(optimizer=opt,
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            history = model.fit(train_data,
                                validation_data=val_data,
                                epochs=1,
                                callbacks=[mc, reduce_lr],
                                steps_per_epoch=train_data.__len__())

            plot.plot_loss(history)
            plot.plot_accuracy(history)
            model = load_model(get_model_name(num_fold, isBest))
            results = model.evaluate(val_data)
            results = dict(zip(model.metrics_name, results))

            data[num_fold] = [train_data, val_data]

        num_fold += 1
        tf.keras.backend.clear_session()
        if (num_fold > len(config.MODELS)):
            break

    return data
Exemplo n.º 11
0
def main():
    distance_functions = [euclidian_distance]
    clustering_classes = [PerfectClustering, OnlineClusteringV2]
    network_config = "sp=True_tm=True_tp=False_SDRClassifier"
    exp_names = [
        "body_acc_x",
        "binary_ampl=10.0_mean=0.0_noise=0.0",
        "binary_ampl=10.0_mean=0.0_noise=1.0",
        "sensortag_z",
    ]

    # Exp params
    moving_average_window = 2  # for all moving averages of the experiment
    ClusteringClass = clustering_classes[1]
    distance_func = distance_functions[0]
    exp_name = exp_names[0]
    start_idx = 1000
    end_idx = 12000
    input_width = 2048 * 32
    active_cells_weight = 0
    predicted_active_cells_weight = 10
    max_num_clusters = 3
    num_cluster_snapshots = 1
    show_plots = True
    distance_matrix_ignore_noise = True  # whether to ignore label 0 (noise)

    # Clean an create output directory for the graphs
    plots_output_dir = "plots/%s" % exp_name
    if os.path.exists(plots_output_dir):
        shutil.rmtree(plots_output_dir)
    os.makedirs(plots_output_dir)

    # load traces
    file_name = get_file_name(exp_name, network_config)
    traces = loadTraces(file_name)
    num_records = len(traces["sensorValue"])

    # start and end for the x axis of the graphs
    if start_idx < 0:
        start = num_records + start_idx
    else:
        start = start_idx
    if end_idx < 0:
        end = num_records + end_idx
    else:
        end = end_idx
    xlim = [0, end - start]

    # input data
    sensor_values = traces["sensorValue"][start:end]
    categories = traces["actualCategory"][start:end]
    active_cells = traces["tmActiveCells"][start:end]
    predicted_active_cells = traces["tmPredictedActiveCells"][start:end]
    raw_anomaly_scores = traces["rawAnomalyScore"][start:end]
    anomaly_scores = []
    anomaly_score_ma = 0.0
    for raw_anomaly_score in raw_anomaly_scores:
        anomaly_score_ma = moving_average(anomaly_score_ma, raw_anomaly_score, moving_average_window)
        anomaly_scores.append(anomaly_score_ma)

    # generate sdrs to cluster
    active_cells_sdrs = convert_to_sdrs(active_cells, input_width)
    predicted_active_cells_sdrs = np.array(convert_to_sdrs(predicted_active_cells, input_width))
    sdrs = (
        float(active_cells_weight) * np.array(active_cells_sdrs)
        + float(predicted_active_cells_weight) * predicted_active_cells_sdrs
    )

    # list of timesteps specifying when a snapshot of the clusters will be taken
    step = (end - start) / num_cluster_snapshots - 1
    cluster_snapshot_indices = range(step, end - start, step)

    # run clustering
    (clustering_accuracies, cluster_snapshots, closest_cluster_history) = run(
        sdrs,
        categories,
        anomaly_scores,
        distance_func,
        moving_average_window,
        max_num_clusters,
        ClusteringClass,
        cluster_snapshot_indices,
    )
    # cluster_categories = []
    # for c in closest_cluster_history:
    #   if c is not None:
    #     cluster_categories.append(c.label_distribution()[0]['label'])

    # plot cluster assignments over time
    for i in range(num_cluster_snapshots):
        clusters = cluster_snapshots[i]
        snapshot_index = cluster_snapshot_indices[i]
        plot_cluster_assignments(plots_output_dir, clusters, snapshot_index)

        # plot inter-cluster distance matrix
        # plot_id = 'inter-cluster_t=%s' % snapshot_index
        # plot_inter_sequence_distances(plots_output_dir,
        #                               plot_id,
        #                               distance_func,
        #                               sdrs[:snapshot_index],
        #                               cluster_categories[:snapshot_index],
        #                               distance_matrix_ignore_noise)

        # plot inter-category distance matrix
        plot_id = "inter-category_t=%s " % snapshot_index
        plot_inter_sequence_distances(
            plots_output_dir,
            plot_id,
            distance_func,
            sdrs[:snapshot_index],
            categories[:snapshot_index],
            distance_matrix_ignore_noise,
        )

    # plot clustering accuracy over time
    plot_id = "file=%s | moving_average_window=%s" % (exp_name, moving_average_window)
    plot_accuracy(plots_output_dir, plot_id, sensor_values, categories, anomaly_scores, clustering_accuracies, xlim)

    if show_plots:
        plt.show()