def evaluate(self, metric):
		if self.true_labels == 0:
			print("Error: A true_labels.txt file is needed")
			return
		
		if metric == "nmi":
			print("NMI: %f" % nmi(self.true_labels, self.predicted_labels))
		elif metric == "purity":
			print("Purity: %f" % purity(self.true_labels, self.predicted_labels))
		elif metric == "both":
			print("NMI: %f" % nmi(self.true_labels, self.predicted_labels))
			print("Purity: %f" % purity(self.true_labels, self.predicted_labels))
		else:
			print("Error: This metric is not available. Choose among the following options: 'nmi', 'purity', 'both'")
Exemplo n.º 2
0
 def nmi(self):
     """
     Return the Normalized Mutual Information (NMI) of the clustering 
     solution with respect to the ground-truth labels given.
     """
     z = np.argmax(self.pi_nk, 1)
     return nmi(z, self.label)
def cluster(o, n_clusters=3):
    df = pd.read_pickle(o)
    reprsn = df['embedding'].values
    node_idx = df['node_id'].values
    labels = ry1()
    reprsn = [np.asarray(row, dtype='float32') for row in reprsn]
    reprsn = np.array(reprsn, dtype='float32')
    true_labels = [labels[int(node)] for node in node_idx]
    data = reprsn
    km = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
    km.fit(data)

    km_means_labels = km.labels_
    km_means_cluster_centers = km.cluster_centers_
    km_means_labels_unique = np.unique(km_means_labels)
    '''colors_ = cycle(colors.cnames.keys())

    m,initial_dim = np.shape(data)
    data_2 = tsne(data, 2, initial_dim, 30)

    plt.figure(figsize=(12, 6))
    plt.scatter(data_2[:, 0], data_2[:, 1], c=true_labels)
    plt.title('True Labels')
    plt.show()'''

    nmiv = nmi(true_labels, km_means_labels)
    print '                 NMI value          ', nmiv

    return nmiv  #cal the accuracy of valdata
Exemplo n.º 4
0
def nmi_matrix(df):
    mat = pd.DataFrame(index=df.columns, columns=df.columns, data=0)
    for i in df.columns:
        for j in df.columns:
            mat.loc[i, j] = nmi(df[i], df[j])

    return mat
Exemplo n.º 5
0
    def benchmark(self, name: str, features: np.ndarray,
                  labels: np.ndarray) -> Tuple[str, Dict]:
        """
        Returns the clustering performance results in str and dict format.

        The metrics used are as follows:
            1. Duration
            2. Adjusted RAND Score
            3. Normalized Mutual Information
            4. Davies-Bouldin Index
            5. Silhouette Score
            6. Calinski-Harabasz Score
            7. Clustering Accuracy

        Parameters
        ----------
        name: str
            The name of the benchmark.
        features: np.ndarray
            The test instances to cluster.
        labels: np.ndarray
            The test labels.

        Returns
        -------
        str
            The formatted string of the benchmark results.
        results: Dict
            The dictionary of benchmark results.
        """
        start_time = time.time()
        predictions = self.predict(features)

        results = {}

        results["name"] = name
        results["duration"] = time.time() - start_time
        results["ari"] = ari(labels_true=labels, labels_pred=predictions)
        results["nmi"] = nmi(labels_true=labels, labels_pred=predictions)
        results["dbi"] = davies_bouldin_score(features, predictions)
        results["silhouette"] = silhouette_score(features,
                                                 predictions,
                                                 metric="euclidean")
        results["ch_score"] = calinski_harabasz_score(features, predictions)
        results["clustering_accuracy"] = clustering_accuracy(
            target=labels, prediction=predictions)

        return (
            "%-9s\t%.2fs\t%.3f\t\t%.3f\t\t%.3f\t\t%.3f\t\t%.3f\t\t%.3f" % (
                results.get("name"),
                results.get("duration"),
                results.get("dbi"),
                results.get("silhouette"),
                results.get("ch_score"),
                results.get("nmi"),
                results.get("ari"),
                results.get("clustering_accuracy"),
            ),
            results,
        )
Exemplo n.º 6
0
def get_accuracy(cluster_assignments, y_true, n_clusters):
    '''
    Computes the accuracy based on the provided kmeans cluster assignments
    and true labels, using the Munkres algorithm

    cluster_assignments:    array of labels, outputted by kmeans
    y_true:                 true labels
    n_clusters:             number of clusters in the dataset

    returns:    a tuple containing the accuracy and confusion matrix,
                in that order
    '''
    y_pred, confusion_matrix = get_y_preds(cluster_assignments, y_true,
                                           n_clusters)

    from sklearn.metrics import normalized_mutual_info_score as nmi
    nmi_score = nmi(y_true, y_pred)
    print('NMI: ' + str(np.round(nmi_score, 4)))

    from sklearn.metrics import adjusted_rand_score as ari
    ari_score = ari(y_true, y_pred)
    print('ARI: ' + str(np.round(ari_score, 4)))

    # with open('C:/Users/mals6571/Desktop/SpectralNet-master/src/applications/Results.txt','a') as my_file:
    ProjectDir = get_project_root()
    with open(os.path.join(ProjectDir, 'Results.txt'), 'a') as my_file:
        my_file.write("\n")
        my_file.write('NMI: ' + str(np.round(nmi_score, 4)))
        my_file.write("\n")
        my_file.write('ARI: ' + str(np.round(ari_score, 4)))
        my_file.write("\n")

    # calculate the accuracy
    return np.mean(y_pred == y_true), confusion_matrix
def run_net(data, params):
    #
    # UNPACK DATA
    #

    x_train_unlabeled, y_train_unlabeled, x_val, y_val, x_test, y_test = data[
        'spectral']['train_and_test']

    inputs_vae = Input(shape=(params['img_dim'], params['img_dim'], 1),
                       name='inputs_vae')
    ConvAE = Conv.ConvAE(inputs_vae, params)
    ConvAE.vae.load_weights('vae_mnist.h5')

    lh = LearningHandler(lr=params['spec_lr'],
                         drop=params['spec_drop'],
                         lr_tensor=ConvAE.learning_rate,
                         patience=params['spec_patience'])

    lh.on_train_begin()

    losses_vae = np.empty((500, ))
    for i in range(500):
        # if i==0:
        x_val_y = ConvAE.vae.predict(x_val)[2]
        losses_vae[i] = ConvAE.train_vae(x_val, x_val_y, params['batch_size'])
        x_val_y = ConvAE.vae.predict(x_val)[2]
        y_sp = x_val_y.argmax(axis=1)
        print_accuracy(y_sp, y_val, params['n_clusters'])
        print("Epoch: {}, loss={:2f}".format(i, losses_vae[i]))
        # else:
        #     losses_vae[i] = ConvAE.train_vae(x_val, x_val_y,params['batch_size'])
        #     x_val_y = ConvAE.vae.predict(x_val)[2]
        #     y_sp = x_val_y.argmax(axis=1)
        #     print_accuracy(y_sp, y_val, params['n_clusters'])
        #     print("Epoch: {}, loss={:2f}".format(i, losses_vae[i]))

        if i > 1:
            if np.abs(losses_vae[i] - losses_vae[i - 1]) < 0.0001:
                print('STOPPING EARLY')
                break

        # if self.lh.on_epoch_end(i, val_losses[i]):
        #     print('STOPPING EARLY')
        # break
    # print training status

    # ConvAE.vae.save_weights('IJCAI_mnist2.h5')
    # spectral_net.net.save_weight('save.h5')
    # spectral_net.save
    print("finished training")

    x_val_y = ConvAE.vae.predict(x_val)[2]
    # x_val_y = ConvAE.classfier.predict(x_val_lp)
    y_sp = x_val_y.argmax(axis=1)

    print_accuracy(y_sp, y_val, params['n_clusters'])
    from sklearn.metrics import normalized_mutual_info_score as nmi
    nmi_score1 = nmi(y_sp, y_val)
    print('NMI: ' + str(np.round(nmi_score1, 4)))
Exemplo n.º 8
0
 def evaluate_model(self):
     """calculates NMI
     Arguments:
         ground_truth,
         predicted_values
     """
     nmi_eval = nmi(self.ground_truth, self.predicted)
     print(f"NMI Accuracy is: {nmi_eval}")
def metriques(model, y_true, y_pred):
    pred1 = model.row_labels_
    nmi_ = nmi(y_true, pred1)
    ari_ = ari(y_true, pred1)
    accuracy = ACCURACY(y_true, pred1)
    print("NMI: {}\nARI: {} ".format(nmi_, ari_))
    print("ACCURACY: %s" % accuracy)
    return nmi_, ari_, accuracy
Exemplo n.º 10
0
def run_experiment(ae_model_path):
    logger.info(
        f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
    )
    logger.info(
        f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
    )
    logger.info(f"Working now on {ae_model_path.name}")
    logger.info(
        f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
    )
    logger.info(
        f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
    )
    new_seed = random.randint(0, 1000)
    logger.info(f"Seed value for this is: {new_seed}")
    set_random_seed(new_seed)

    ae_module = stacked_ae(pt_data.shape[1], [500, 500, 2000, 10],
                           weight_initalizer=torch.nn.init.xavier_normal_,
                           activation_fn=lambda x: F.relu(x),
                           loss_fn=None,
                           optimizer_fn=None)

    model_data = torch.load(ae_model_path, map_location='cpu')
    ae_module.load_state_dict(model_data)
    ae_module = ae_module.cuda()

    # Get embedded data
    embedded_data = None
    for batch_data in torch.utils.data.DataLoader(pt_data,
                                                  batch_size=256,
                                                  shuffle=False):
        embedded_batch_np = ae_module.forward(
            batch_data.cuda())[0].detach().cpu().numpy()
        if embedded_data is None:
            embedded_data = embedded_batch_np
        else:
            embedded_data = np.concatenate([embedded_data, embedded_batch_np],
                                           0)
    del ae_module

    # Perform k-means
    k_means_labels = k_means(embedded_data, n_clusters, n_init=20)[1]

    k_means_nmi_value = nmi(gold_labels,
                            k_means_labels,
                            average_method='arithmetic')
    k_means_acc_value = cluster_acc(gold_labels, k_means_labels)[0]

    result_file = Path(f"{result_dir}/results_ae_kmeans_{dataset_name}.txt")
    result_file_exists = result_file.exists()
    f = open(result_file, "a+")
    if not result_file_exists:
        f.write("#\"ae_model_name\"\t\"NMI\"\t\"ACC\"\n")
    f.write(
        f"{ae_model_path.name}\t{k_means_nmi_value}\t{k_means_acc_value}\n")
    f.close()
def tensfact_baseline():
    n_clusters = 81
    f = open('buzz_user_tensor_45.npy')
    X_buzz = np.load(f)
    print X_buzz.shape

    X_buzz = X_buzz[buzz_ground.keys()]
    buzz_ground1 = buzz_ground.values()

    km = KMeans(n_clusters=81, init='k-means++', n_init=1, verbose=False)
    sc = 0.0
    sc1 = 0.0
    sc2 = 0.0
    for i in xrange(10):
        km.fit(X_buzz)
        sc += nmi(buzz_ground1, km.labels_)
        sc1 += ari(buzz_ground1, km.labels_)
        sc2 += ami(buzz_ground1, km.labels_)

    print "BUZZ"
    print "nmi score %f" % (sc / float(10))
    print "ari score %f" % (sc1 / float(10))
    print "ami score %f" % (sc2 / float(10))

    f = open('poli_user_tensor_75.npy')
    X_poli = np.load(f)
    print X_poli.shape
    X_poli = X_poli[poli_ground.keys()]
    poli_ground1 = poli_ground.values()
    sc = 0.0
    sc1 = 0.0
    km1 = KMeans(n_clusters=310, init='k-means++', n_init=1, verbose=False)
    sc = 0.0
    sc1 = 0.0
    sc2 = 0.0
    for i in xrange(10):
        km1.fit(X_poli)
        sc += nmi(poli_ground1, km1.labels_)
        sc1 += ari(poli_ground1, km1.labels_)
        sc2 += ami(poli_ground1, km1.labels_)

    print "poli"
    print "nmi score %f" % (sc / float(10))
    print "ari score %f" % (sc1 / float(10))
    print "ami score %f" % (sc2 / float(10))
Exemplo n.º 12
0
def NMI(y_true, y_pred):
    b, _, _, ch = y_true.shape
    output = np.zeros((b, ch))
    for b_idx in range(b):
        for ch_idx in range(ch):
            true_max = y_true[b_idx, ..., ch_idx].max()
            pred_vmin = y_pred[b_idx, ..., ch_idx].max()
            output[b_idx, ch_idx] = nmi((y_true[b_idx, ..., ch_idx]/true_max*255).astype('uint8').ravel(), (y_pred[b_idx, ..., ch_idx]/pred_vmin*255).astype('uint8').ravel())
    return output
Exemplo n.º 13
0
def execute_algo(model, model_name, X, y, verbose=True):
    print("##############\n# {}\n##############".format(model_name))
    model.fit(X)
    res_nmi = nmi(model.row_labels_, y)
    res_ari = ari(model.row_labels_, y)
    res_acc = accuracy(model.row_labels_, y)
    if verbose:
        print("NMI =", res_nmi)
        print("ARI =", res_ari)
        print("ACC =", res_acc)
    return res_nmi, res_ari, res_acc
Exemplo n.º 14
0
    def train(self):
        x, y = np.load('images/64px_image_x.npy'), np.load(
            'images/64px_image_y.npy')
        x = np.reshape(x, (40000, 64, 64, 1))
        kmeans = KMeans(n_clusters=2, n_init=20)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(y_pred)
        self.model.get_layer(name='clustering').set_weights(
            [kmeans.cluster_centers_])

        loss = 0
        ae_loss = 0
        index = 0
        maxiter = 80000
        update_interval = 100
        index_array = np.arange(x.shape[0])
        batch_size = 16
        tol = 0.001

        # model.load_weights('DEC_model_final.h5')

        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q = self.model.predict(x, verbose=0)
                # update the auxiliary target distribution p
                p = self.target_distribution(q)

                # evaluate the clustering performance
                y_pred = q.argmax(1)
                if y is not None:
                    acc = np.round(metrics.acc(y, y_pred), 5)
                    nmi = np.round(metrics.nmi(y, y_pred), 5)
                    ari = np.round(metrics.ari(y, y_pred), 5)
                    loss = np.round(loss, 5)
                    print(
                        'Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f, loss=%.5f'
                        % (ite, acc, nmi, ari, loss))

                # check stop criterion - model convergence
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = np.copy(y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    break
            idx = np.random.randint(low=0, high=x.shape[0], size=batch_size)
            # ae_loss = ae.train_on_batch(x=x[idx], y=x[idx])
            loss = self.model.train_on_batch(x=x[idx], y=p[idx])
            index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

        self.model.save_weights('DEC_model_final_64px.h5')
        self.test_model()
Exemplo n.º 15
0
 def on_epoch_end(self, epoch, logs=None):
     if int(epochs/10) != 0 and epoch % int(epochs/10) != 0:
         return
     feature_model = Model(self.model.input,
                           self.model.get_layer(
                               'encoder_%d' % (int(len(self.model.layers) / 2) - 1)).output)
     features = feature_model.predict(self.x)
     km = KMeans(n_clusters=len(np.unique(self.y)), n_init=20, n_jobs=4)
     y_pred = km.fit_predict(features)
     # print()
     print(' '*8 + '|==>  acc: %.4f,  nmi: %.4f  <==|'
           % (metrics.acc(self.y, y_pred), metrics.nmi(self.y, y_pred)))
Exemplo n.º 16
0
def _are_labels_equal(labels_new, labels_old):
    """
    Check if the old labels and new labels are equal. Therefore check the nmi for each subspace. If all are 1, labels
    have not changed.
    :param labels_new: new labels list
    :param labels_old: old labels list
    :return: True if labels for all subspaces are the same
    """
    if labels_new is None or labels_old is None:
        return False
    return all([
        nmi(labels_new[i], labels_old[i], average_method='arithmetic') == 1
        for i in range(len(labels_new))
    ])
Exemplo n.º 17
0
    def evaluate(self, metric):
        if self.true_labels == 0:
            print("Error: A true_labels.csv file is needed")
            return

        true_labels = list(self.true_labels.values())

        if metric == "nmi":
            print("NMI: %f" % nmi(true_labels, self.predicted_labels))
        elif metric == "purity":
            print("Purity: %f" % purity(true_labels, self.predicted_labels))
        elif metric == "confusion matrix":
            print("Confusion matrix:")
            print(confusion_matrix(true_labels, self.predicted_labels))
        elif metric == "all":
            print("NMI: %f" % nmi(true_labels, self.predicted_labels))
            print("Purity: %f" % purity(true_labels, self.predicted_labels))
            print("Confusion matrix:")
            print(confusion_matrix(true_labels, self.predicted_labels))
        else:
            print(
                "Error: This metric is not available. Choose among the following options: 'nmi', 'purity', 'both'"
            )
def run_trial(X, labels, k):
    errors = '"'

    # Run our dbscan
    start = time()
    """
    if metric == 'seuclidean':
        db = KMeans(eps,minPts,metric=metric,metric_params={'V':V})
    else:
        db = kmean(,minPts,metric=metric)
    """
    db = KMeans(k, n_jobs=12)
    pred_labels = db.fit_predict(X)
    elapsed = time() - start

    try:
        ari_score = ari(pred_labels, labels)
    except Exception as e:
        errors += str(e) + '; '
        ari_score = np.nan
    try:
        nmi_score = nmi(pred_labels, labels, average_method='arithmetic')
    except Exception as e:
        errors += str(e) + '; '
        nmi_score = np.nan
    try:
        ss_score = ss(X, pred_labels)
    except Exception as e:
        errors += str(e) + '; '
        ss_score = np.nan
    try:
        vrc_score = vrc(X, pred_labels)
    except Exception as e:
        errors += str(e) + '; '
        vrc_score = np.nan
    try:
        dbs_score = dbs(X, pred_labels)
    except Exception as e:
        errors += str(e) + '; '
        dbs_score = np.nan

    errors += '"'

    return [
        k, elapsed, ari_score, nmi_score, ss_score, vrc_score, dbs_score,
        errors
    ]
Exemplo n.º 19
0
def run_net(data, params):
    #
    # UNPACK DATA
    #

    x_train_unlabeled, y_train_unlabeled, x_val, y_val, x_test, y_test = data[
        'spectral']['train_and_test']

    inputs_vae = Input(shape=(params['img_dim'], params['img_dim'], 1),
                       name='inputs_vae')
    ConvAE = Conv.ConvAE(inputs_vae, params)
    ConvAE.vae.load_weights(
        '/home/stu2/Signal-1/Deep-Spectral-Clustering-using-Dual-Autoencoder-Network-master/src/applications/vae_mnist.h5'
    )

    lh = LearningHandler(lr=params['spec_lr'],
                         drop=params['spec_drop'],
                         lr_tensor=ConvAE.learning_rate,
                         patience=params['spec_patience'])

    lh.on_train_begin()

    losses_vae = np.empty((500, ))
    for i in range(100):
        # if i==0:
        x_val_y = ConvAE.vae.predict(x_val)[2]  #得到y
        losses_vae[i] = ConvAE.train_vae(x_val, x_val_y, params['batch_size'])
        x_val_y = ConvAE.vae.predict(x_val)[2]
        y_sp = x_val_y.argmax(axis=1)
        print_accuracy(y_sp, y_val, params['n_clusters'])
        print("Epoch: {}, loss={:2f}".format(i, losses_vae[i]))

        if i > 1:
            if np.abs(losses_vae[i] - losses_vae[i - 1]) < 0.0001:
                print('STOPPING EARLY')
                break

    print("finished training")

    x_val_y = ConvAE.vae.predict(x_val)[2]
    # x_val_y = ConvAE.classfier.predict(x_val_lp)
    y_sp = x_val_y.argmax(axis=1)

    print_accuracy(y_sp, y_val, params['n_clusters'])
    from sklearn.metrics import normalized_mutual_info_score as nmi
    nmi_score1 = nmi(y_sp, y_val)
    print('NMI: ' + str(np.round(nmi_score1, 4)))
def cluster0(n_clusters=3):
    labels = ry1()
    true_labels = np.asarray(labels, dtype='int32')
    data = rx1()
    km = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
    km.fit(data)

    km_means_labels = km.labels_
    km_means_cluster_centers = km.cluster_centers_
    km_means_labels_unique = np.unique(km_means_labels)

    colors_ = cycle(colors.cnames.keys())

    nmiv = nmi(true_labels, km_means_labels)
    print 'nmi value          ', nmiv

    return nmiv  #cal the accuracy of valdata
Exemplo n.º 21
0
    def evaluate(train_round_idx, ae_module, cluster_module):
        test_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(pt_data), batch_size=256)

        pred_labels = np.zeros(pt_data.shape[0], dtype=np.int)
        index = 0
        n_batches = 0
        for batch_data in test_loader:
            batch_data = batch_data[0].cuda()
            n_batches += 1
            batch_size = batch_data.shape[0]
            embedded_data, reconstructed_data = ae_module.forward(batch_data)
            labels = cluster_module.prediction_hard_np(embedded_data)
            pred_labels[index:index + batch_size] = labels
            index = index + batch_size
        nmi_value = nmi(gold_labels, pred_labels, average_method='arithmetic')
        acc_value = cluster_acc(gold_labels, pred_labels)[0]
        logger.info(
            f"{train_round_idx} Evaluation: NMI: {nmi_value} ACC: {acc_value}")
        return nmi_value, acc_value
Exemplo n.º 22
0
def run(dataset_name='seeds', n_clusters=3, mode='cpu'):
    X, Y = load_dataset(dataset_name)
    X = normalize_dataset(X)
    for i in range(1):
        C, V, m = sub_kmeans(X, n_clusters, mode)
        trans = V.T.real
        X_rotated = np.matmul(trans[None, :, :],
                              np.transpose(X[:, None, :], [0, 2, 1]))
        X_rotated = X_rotated.squeeze(-1).T
        acc = nmi(Y, C)
        M = assign_markers(C)
        K = assign_colors(Y)
        print('')
        print('[i] Results')
        print('[*] m: %d' % m)
        print('[*] NMI: %.5f' % acc)
        data_points = zip(X_rotated[0], X_rotated[1], K, M)
        for x_, y_, c_, m_ in data_points:
            plt.scatter(x_, y_, c=c_, marker=m_, s=3)
        plt.title('Seeds, m={:d}, NMI={:.3f}'.format(m, acc))
        plt.savefig('{}.png'.format(dataset_name), dpi=300)
Exemplo n.º 23
0
def get_scores(x, y, n, k, dtr, dev):
    tx = tens0((n, k), dt=dtr, dev=dev)
    ty = tens0((n, k), dt=dtr, dev=dev)
    tx = tens_sel_set(tx, x, 1)
    ty = tens_sel_set(ty, y, 1)
    t = tx.t().matmul(ty)
    del tx, ty
    tt = t.max() - t
    tt = tt.cpu().numpy()
    row, col = ass(tt)
    del tt
    t = t.cpu().numpy()
    t = t[row, col].sum()
    t = t.tolist() / n
    x = x.cpu().numpy()
    y = y.cpu().numpy()
    s = {
        'nmi': nmi(x, y, average_method='geometric'),
        'ari': ari(x, y),
        'acc': t,
    }
    return s
Exemplo n.º 24
0
def run_net(data, params):
    #
    # UNPACK DATA
    #

    x_train, y_train, x_val, y_val, x_test, y_test = data['spectral'][
        'train_and_test']
    x_train_unlabeled, y_train_unlabeled, x_train_labeled, y_train_labeled = data[
        'spectral']['train_unlabeled_and_labeled']
    x_val_unlabeled, y_val_unlabeled, x_val_labeled, y_val_labeled = data[
        'spectral']['val_unlabeled_and_labeled']

    if 'siamese' in params['affinity']:
        pairs_train, dist_train, pairs_val, dist_val = data['siamese'][
            'train_and_test']

    x = np.concatenate((x_train, x_val, x_test), axis=0)
    y = np.concatenate((y_train, y_val, y_test), axis=0)

    if len(x_train_labeled):
        y_train_labeled_onehot = OneHotEncoder().fit_transform(
            y_train_labeled.reshape(-1, 1)).toarray()
    else:
        y_train_labeled_onehot = np.empty((0, len(np.unique(y))))

    #
    # SET UP INPUTS
    #

    # create true y placeholder (not used in unsupervised training)
    y_true = tf.placeholder(tf.float32,
                            shape=(None, params['n_clusters']),
                            name='y_true')

    batch_sizes = {
        'Unlabeled': params['batch_size'],
        'Labeled': params['batch_size'],
        'Orthonorm': params.get('batch_size_orthonorm', params['batch_size']),
    }

    input_shape = x.shape[1:]

    # spectralnet has three inputs -- they are defined here
    inputs = {
        'Unlabeled': Input(shape=input_shape, name='UnlabeledInput'),
        'Labeled': Input(shape=input_shape, name='LabeledInput'),
        'Orthonorm': Input(shape=input_shape, name='OrthonormInput'),
    }

    #
    # DEFINE AND TRAIN SIAMESE NET
    #

    # run only if we are using a siamese network
    if params['affinity'] == 'siamese':
        siamese_net = networks.SiameseNet(inputs, params['arch'],
                                          params.get('siam_reg'), y_true)

        history = siamese_net.train(pairs_train, dist_train, pairs_val,
                                    dist_val, params['siam_lr'],
                                    params['siam_drop'],
                                    params['siam_patience'], params['siam_ne'],
                                    params['siam_batch_size'])

    else:
        siamese_net = None

    #
    # DEFINE AND TRAIN SPECTRALNET
    #

    spectral_net = networks.SpectralNet(inputs, params['arch'],
                                        params.get('spec_reg'), y_true,
                                        y_train_labeled_onehot,
                                        params['n_clusters'],
                                        params['affinity'],
                                        params['scale_nbr'], params['n_nbrs'],
                                        batch_sizes, siamese_net, x_train,
                                        len(x_train_labeled))

    spectral_net.train(x_train_unlabeled, x_train_labeled, x_val_unlabeled,
                       params['spec_lr'], params['spec_drop'],
                       params['spec_patience'], params['spec_ne'])

    print("finished training")

    #
    # EVALUATE
    #

    # get final embeddings
    x_spectralnet = spectral_net.predict(x)

    # get accuracy and nmi
    kmeans_assignments, km = get_cluster_sols(x_spectralnet,
                                              ClusterClass=KMeans,
                                              n_clusters=params['n_clusters'],
                                              init_args={'n_init': 10})
    y_spectralnet, _ = get_y_preds(kmeans_assignments, y, params['n_clusters'])
    print_accuracy(kmeans_assignments, y, params['n_clusters'])
    from sklearn.metrics import normalized_mutual_info_score as nmi
    nmi_score = nmi(kmeans_assignments, y)
    print('NMI: ' + str(np.round(nmi_score, 3)))

    if params['generalization_metrics']:
        x_spectralnet_train = spectral_net.predict(x_train_unlabeled)
        x_spectralnet_test = spectral_net.predict(x_test)
        km_train = KMeans(
            n_clusters=params['n_clusters']).fit(x_spectralnet_train)
        from scipy.spatial.distance import cdist
        dist_mat = cdist(x_spectralnet_test, km_train.cluster_centers_)
        closest_cluster = np.argmin(dist_mat, axis=1)
        print_accuracy(closest_cluster, y_test, params['n_clusters'],
                       ' generalization')
        nmi_score = nmi(closest_cluster, y_test)
        print('generalization NMI: ' + str(np.round(nmi_score, 3)))

    return x_spectralnet, y_spectralnet
Exemplo n.º 25
0
        y[i] = 2

sparsity = 1 - (np.sum(T>0) / np.product(T.shape))
f, dt = CreateOutputFile("yelp", date = True)


output_path = f"./output/_yelp/" + dt[:10] + "_" + dt[11:13] + "." + dt[14:16] + "." + dt[17:19] + "/"
directory = os.path.dirname(output_path)
if not os.path.exists(directory):
    os.makedirs(directory)

model = CoClust(np.sum(T.shape) * 10, optimization_strategy = alg, path = output_path)
model.fit(T)

tau = model.final_tau_
nmi_x = nmi(y, model.x_, average_method='arithmetic')
ari_x = ari(y, model.x_)

f.write(f"{T.shape[0]},{T.shape[1]},{T.shape[2]},{len(set(y))},,,,{tau[0]},{tau[1]},{tau[2]},{nmi_x},,,{ari_x},,,{model._n_clusters[0]},{model._n_clusters[1]},{model._n_clusters[2]},{model.execution_time_},{sparsity},{alg}\n")
f.close()

gx = open(output_path + alg + "_assignments_"+ tensor + "_x.txt", 'w')
for i in range(T.shape[0]):
    gx.write(f"{i}\t{model._assignment[0][i]}\n")
gx.close()


gy = open(output_path + alg + "_assignments_"+ tensor + "_y.txt", 'w')
for i in range(T.shape[1]):
    gy.write(f"{i}\t{model._assignment[1][i]}\n")
gy.close()
Exemplo n.º 26
0
    #feat_lbp.append(mahotas.features.lbp(img[i],1,8))
feat_lbp = scale(np.array(feat_lbp))

# PCA on LBP features
#pca = PCA(n_components=20)
#feat_lbp = pca.fit_transform(feat_lbp)
#print "Variance Ratio: ",sum(pca.explained_variance_ratio_)

# Normalization of features
#feat_lbp = scale(feat_lbp)

# Save LBP features
file_pkl = open("face_lbp.pkl","wb")
pickle.dump(feat_lbp,file_pkl)
file_pkl.close()

# Compute affinity matrix
flag_sigma = 'global'
sigma_lbp, aff_lbp = compute_affinity(feat_lbp,flag_sigma=flag_sigma,\
        sigma=100.,nn=8)
print "kernel computation finished"

label_pred_identity = spectral_clustering(aff_lbp,n_clusters=20)
nmi_identity = nmi(label_pred_identity,img_identity)
print "NMI with identity: ",nmi_identity

label_pred_pose = spectral_clustering(aff_lbp,n_clusters=4)
nmi_pose = nmi(label_pred_pose,img_pose)
print "NMI with pose: ",nmi_pose

def main():
    path = './data/aucs_edgelist.txt'

    # Declare each layer's graph
    lunch = init_graph()
    facebook = init_graph()
    leisure = init_graph()
    work = init_graph()
    coauthor = init_graph()
    table = {
        'lunch': lunch,
        'facebook': facebook,
        'leisure': leisure,
        'work': work,
        'coauthor': coauthor,
    }
    truth, na = get_truth()

    # Load data into graph
    print(
        "--------------------------------------------------Load multilayers graph--------------------------------------------------"
    )
    with open(path) as f:
        for line in f:
            line = line.strip().split(',')
            name = line[2]
            if line[0] in na or line[1] in na:
                continue
            else:
                table[name].add_edge(line[0], line[1])
    for name, graph in table.items():
        print("\nGraph: {}".format(name))
        print("\tNumber of nodes: {}".format(nx.number_of_nodes(graph)))
        print("\tNumber of edges: {}".format(nx.number_of_edges(graph)))

    graph_list = [lunch, work, coauthor, leisure]
    node_list = list(lunch.nodes)

    # # Tunning k
    # print("--------------------------------------------------Perform k clusters selection--------------------------------------------------")
    # sse_list = []
    # range_k = np.arange(2, 15)
    # for k in range_k:
    #     labels, sse = SCML(graph_list, k, 0.5)
    #     score = silhouette_score(matrix, labels, random_state=42)
    #     print("Number of clusters k = {}".format(k),
    #           ",Silhouette Score = {}".format(round(score, 5)))
    #     sse_list.append(sse)

    # # Plot elbow method for k
    # plot_elbow(range_k, sse_list, "Selection of k")

    # Tunning alpha
    print(
        "--------------------------------------------------Perform alpha selection--------------------------------------------------"
    )
    range_a = np.arange(0.2, 1.1, 0.1)
    den = []
    nmi_list = []
    for alpha in range_a:
        labels = SCML(graph_list, 8, alpha)
        partitions = get_partition(labels, node_list)
        density = get_score(graph_list, partitions)
        den.append(density)
        print("\nAlpha = {}".format(round(alpha, 1)))
        print("\tDensity = {}".format(density))
        nmi_value = nmi(truth, labels)
        print("\tNMI = {}".format(nmi_value))
        nmi_list.append(nmi_value)

    # Plot elbow method for alpha
    plot_elbow(range_a, den, "Selection of alpha (Density)")
    plot_elbow(range_a, nmi_list, "Selection of alpha (NMI)")

    # Select the best model
    print(
        "--------------------------------------------------Multilayer Result---------------------------------------------------"
    )
    labels = SCML(graph_list, 8, 0.2)
    partitions = get_partition(labels, node_list)

    print("NMI: {}".format(nmi(truth, labels)))
    purity = purity_score(truth, labels)
    print("Purity: {}".format(purity))
    print(
        "--------------------------------------------------Single layer Result--------------------------------------------------"
    )
    for name, g in table.items():
        print("\nLayer: {}".format(name))
        labels = onelayer(g, 8)
        # print(labels)
        print("\tNMI: {}".format(nmi(truth, labels)))
        purity = purity_score(truth, labels)
        print("\tPurity: {}".format(purity))
Exemplo n.º 28
0
        x_init = np.zeros(nVars)
        options = {'verbose':3}
        
        data_mat = {'nInst':nInst, 'nVars':nVars, 'A':A, 'x':x, 'b':b, \
                'x_init':x_init}
        savemat("./Mark_Schmidt/minConf/minConf_SPG_input.mat",data_mat)
        (x, f, funEvals, projects) = minConf_SPG(funObj, x_init, funProj, options)
    elif flag_test == 8:
        options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9,\
                'maxIter':500, 'suffDec':1e-4, 'interp':2, 'memory':10,\
                'useSpectral':1,'curvilinear':0,'feasibleInit':0,'testOpt':1,\
                'bbType':1}
        options = {'verbose':100, 'interp':10}
        options = setDefaultOptions(options, options_default)
    elif flag_test == 9:
        label = np.random.randint(0,10,100)
        alpha = np.array([.95, .85])
        beta = np.array([.7, .55])
        [num_ML, num_CL] = [100, 100]
        S = genConstraints(label, alpha, beta, num_ML, num_CL)

    elif flag_test == 10:
        tp = load_iris()
        [X, Y] = [scale(tp['data']), tp['target']]
        sim_mat = rbf_kernel(X)
        Y_pred = my_spectral_clustering(sim_mat, n_clusters=3)
        print nmi(Y_pred, Y)
    else:
        pass

file_csv = open("data_pheno.csv","wb")
csvwriter = csv.writer(file_csv)
for i in range(data.shape[0]):
    csvwriter.writerow(list(data[i,:]))
file_csv.close()

label_pred_4 = np.loadtxt('label_pred_4.csv')
label_nbs_lf4 = np.loadtxt('label_nbs_lf4.csv')

nmi_1 = []
nmi_2 = []
nmi_3 = []
nmi_4 = []

for j in range(data.shape[1]):
    nmi_1.append(nmi(data[:,j],label_pred_4))
    nmi_2.append(nmi(data[:,j],label_nbs_lf4))
    nmi_3.append(nmi(data[:,j],patient_label))
    nmi_4.append(nmi(data[:,j],gold_stage))

index = np.arange(1,13)
bar_width = 0.2
labels = ['BD.FEV1','oxygen','ExacTrunc','BD.FEV.FVC','FracVol.950U',\
        'Lowest.15.','Emphysema','Neutrophils','Lymphocytes',\
        'Monocytes','Eosinophils','Basophils']

bar_1 = plt.bar(index,nmi_1,bar_width,color='b',label='Normalization+NMF')
bar_2 = plt.bar(index+bar_width,nmi_2,bar_width,color='r',label='NMF')
bar_3 = plt.bar(index+bar_width*2,nmi_3,bar_width,color='g',label='Case/Control')
bar_4 = plt.bar(index+bar_width*3,nmi_4,bar_width,color='y',label='Gold Stage')
plt.xlabel('Phenotype Features')
Exemplo n.º 30
0
    # Find U by eigendecomposition
    aff = beta[0]*aff_pca+beta[1]*aff_gabor
    eig_val,eig_vec = la.eig(aff)
    # Sort eigenvalues and eigenvectors
    idx = eig_val.argsort()[::-1]
    eig_val = eig_val[idx]
    eig_vec = eig_vec[:,idx]
    U = eig_vec[:,0:40]
    
    # Optimize beta
    gamma = matrix([np.trace(aff_pca.dot(U).dot(U.T)),\
            np.trace(aff_gabor.dot(U).dot(U.T))])
    G = matrix([[-1.0,0.],[0.,-1.0]])
    h = matrix([0.,0.])
    A = matrix([1.,1.],(1,2))
    b = matrix(1.0)
    res = qp(2*Q,-2*gamma,G,h,A,b)
    beta = res['x'].T
    residue_old = la.norm(aff-U_old.dot(U_old.T))
    residue = la.norm(aff-U.dot(U.T))
    n_iter = n_iter+1
    print n_iter
    print "beta: ",beta
    print "-2*gamma: ",-2*gamma
    print "Residue: ",residue

clf = KMeans(n_clusters=K,init='random')
label_u = clf.fit_predict(U)
nmi_u = nmi(label_u,label_true)
print nmi_u
Exemplo n.º 31
0
        from keras.datasets import mnist
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x = np.concatenate((x_train, x_test))
        y = np.concatenate((y_train, y_test))
        x = x.reshape((x.shape[0], -1))
        x = np.divide(x, 50.)  # normalize as it does in DEC paper
        print('MNIST samples', x.shape)
        return x, y

    db = 'mnist'
    n_clusters = 10
    x, y = load_mnist()

    # define and train SAE model
    sae = SAE(dims=[x.shape[-1], 500, 500, 2000, 10])
    sae.fit(x=x, epochs=400)
    sae.autoencoders.save_weights('weights_%s.h5' % db)

    # extract features
    print('Finished training, extracting features using the trained SAE model')
    features = sae.extract_feature(x)

    print('performing k-means clustering on the extracted features')
    from sklearn.cluster import KMeans
    km = KMeans(n_clusters, n_init=20)
    y_pred = km.fit_predict(features)

    from sklearn.metrics import normalized_mutual_info_score as nmi
    print('K-means clustering result on extracted features: NMI =',
          nmi(y, y_pred))
Exemplo n.º 32
0
    n_eye = 2

    # Repeat KMeans 50 times to reduce randomness
    label_tmp = []
    inertia_tmp = []
    for i in range(50):
        clf = KMeans(n_clusters=n_pose,init='random')
        clf.fit(U)
        label_tmp.append(list(clf.labels_))
        inertia_tmp.append(clf.inertia_)

    idx_tmp = inertia_tmp.index(min(inertia_tmp))
    label_u = label_tmp[idx_tmp]
    inertia_vec.append(min(inertia_tmp))
    score_vec.append(silhouette_score(U,np.array(label_u)))
    nmi_identity.append(nmi(label_u,img_identity))
    nmi_pose.append(nmi(label_u,img_pose))

    n_show = n_pose
    # Show mean image
    img_avg = np.zeros((n_show,img.shape[1],img.shape[2]))
    cnt_avg = np.zeros((n_show,1))
    for i in range(len(label_u)):
        img_avg[label_u[i]] += img[i]
        cnt_avg[label_u[i]] += 1.

    for i in range(n_show):
        img_avg[i] = img_avg[i]/cnt_avg[i]
        #plt.imshow(img_avg[i],cmap=cm.Greys_r)
        #plt.show()
Exemplo n.º 33
0
        identity,pose,expression,eye],file_pkl)
file_pkl.close()

# Normalization of each image
for i in range(img.shape[0]):
    img[i] = (img[i]-img[i].min())*1./(img[i].max()-img[i].min())

img = img.reshape(img.shape[0],img.shape[1]*img.shape[2])
img = scale(img)

# 'global','local','manual'
flag_sigma = 'global'

# Compute similarity matrix
sigma,aff_img = compute_affinity(img,flag_sigma=flag_sigma,sigma=100.,nn=7)
if flag_sigma == 'local':
    sigma_init = sum(sigma**2)/len(sigma)
    print "Average Sigma(local): ",sigma_init

K = 20
# Construct existing solution Y
Y = np.zeros((img.shape[0],20))
for i in range(img.shape[0]):
    Y[i,img_identity[i]] = 1
val_lambda = 1.2
arr_tmp = val_lambda*Y.dot(Y.T)

label_pred_identity = spectral_clustering(aff_img,n_clusters=K)
nmi_identity = nmi(label_pred_identity,img_identity)
print nmi_identity
Exemplo n.º 34
0
feat_lbp = pickle.load(file_lbp)
file_lbp.close()

# Compute similarity matrix for FFT and Gabor
flag_sigma = 'global'
sigma_fft, aff_fft = compute_affinity(feat_fft,flag_sigma=flag_sigma)
sigma_gabor, aff_gabor = compute_affinity(feat_gabor,flag_sigma=flag_sigma)
sigma_lbp, aff_lbp = compute_affinity(feat_lbp,flag_sigma=flag_sigma)
print "kernel computation finished"

# Spectral Clustering using FFT
K = 4
label_pred_fft = spectral_clustering(aff_fft,n_clusters=K)
label_pred_gabor = spectral_clustering(aff_gabor,n_clusters=K)

nmi_fft_identity = nmi(label_pred_fft,img_identity)
nmi_gabor_identity = nmi(label_pred_gabor,img_identity)
print "nmi_fft_identity: ", nmi_fft_identity
print "nmi_gabor_identity: ",nmi_gabor_identity

for alpha in np.arange(0.1,1.0,0.1):
    aff_add = alpha*aff_fft+(1-alpha)*aff_gabor
    label_pred_add = spectral_clustering(aff_add,n_clusters=K)
    nmi_add_identity = nmi(label_pred_add,img_identity)
    print (alpha,nmi_add_identity)

# Weighted summation
M = 2
Q = matrix([[np.trace(aff_fft.dot(aff_fft)),\
             np.trace(aff_fft.dot(aff_gabor))],\
            [np.trace(aff_gabor.dot(aff_fft)),\
Exemplo n.º 35
0
def fPredict(test_ref, test_art, dParam, dHyper):
    weights_file = dParam['sOutPath'] + os.sep + '{}.h5'.format(
        dHyper['bestModel'])

    patchSize = dParam['patchSize']

    vae = createModel(patchSize, dHyper)

    vae.compile(optimizer='adam', loss=None)

    vae.load_weights(weights_file)

    test_ref = np.expand_dims(test_ref, axis=1)
    test_art = np.expand_dims(test_art, axis=1)

    predict_ref, predict_art = vae.predict([test_ref, test_art],
                                           dParam['batchSize'][0],
                                           verbose=1)

    test_ref = np.squeeze(test_ref, axis=1)
    test_art = np.squeeze(test_art, axis=1)
    predict_art = np.squeeze(predict_art, axis=1)

    if dHyper['unpatch']:
        test_ref = fRigidUnpatchingCorrection2D(dHyper['actualSize'], test_ref,
                                                dParam['patchOverlap'])
        test_art = fRigidUnpatchingCorrection2D(dHyper['actualSize'], test_art,
                                                dParam['patchOverlap'])
        predict_art = fRigidUnpatchingCorrection2D(dHyper['actualSize'],
                                                   predict_art,
                                                   dParam['patchOverlap'],
                                                   'average')

        # pre TV processing
        test_art_tv_1 = denoise_tv_chambolle(test_art, weight=1)
        test_art_tv_3 = denoise_tv_chambolle(test_art, weight=3)
        test_art_tv_5 = denoise_tv_chambolle(test_art, weight=5)

        if dHyper['evaluate']:
            if dParam['lSaveIndividual']:
                fig = plt.figure()
                plt.gray()
                label = 'NRMSE: {:.2f}, SSIM: {:.3f}, NMI: {:.3f}'
                for i in range(len(test_ref)):
                    ax = imshow(test_ref[i])
                    plt.xticks([])
                    plt.yticks([])
                    ax.set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_ref[i]),
                            ssim(test_ref[i],
                                 test_ref[i],
                                 data_range=(test_ref[i].max() -
                                             test_ref[i].min())),
                            nmi(test_ref[i].flatten(), test_ref[i].flatten())))
                    ax.set_title('reference image')
                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + 'reference_' + str(i) + '.png')
                    else:
                        plt.show()

                    ax = imshow(test_art[i])
                    plt.xticks([])
                    plt.yticks([])
                    ax.set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art[i]),
                            ssim(test_ref[i],
                                 test_art[i],
                                 data_range=(test_art[i].max() -
                                             test_art[i].min())),
                            nmi(test_ref[i].flatten(), test_art[i].flatten())))
                    ax.set_title('motion-affected image')
                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + 'art_' + str(i) + '.png')
                    else:
                        plt.show()

                    ax = imshow(predict_art[i])
                    plt.xticks([])
                    plt.yticks([])
                    ax.set_xlabel(
                        label.format(
                            nrmse(test_ref[i], predict_art[i]),
                            ssim(test_ref[i],
                                 predict_art[i],
                                 data_range=(predict_art[i].max() -
                                             predict_art[i].min())),
                            nmi(test_ref[1].flatten(),
                                predict_art[i].flatten())))
                    ax.set_title('reconstructed image')
                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + 'recon_' + str(i) + '.png')
                    else:
                        plt.show()

                    ax = imshow(test_art_tv_1[i])
                    plt.xticks([])
                    plt.yticks([])
                    ax.set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art_tv_1[i]),
                            ssim(test_ref[i],
                                 test_art_tv_1[i],
                                 data_range=(test_art_tv_1[i].max() -
                                             test_art_tv_1[i].min())),
                            nmi(test_ref[i].flatten(),
                                test_art_tv_1[i].flatten())))
                    ax.set_title('TV weight 1')
                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + 'tv1_' + str(i) + '.png')
                    else:
                        plt.show()

                    ax = imshow(test_art_tv_3[i])
                    plt.xticks([])
                    plt.yticks([])
                    ax.set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art_tv_3[i]),
                            ssim(test_ref[i],
                                 test_art_tv_3[i],
                                 data_range=(test_art_tv_3[i].max() -
                                             test_art_tv_3[i].min())),
                            nmi(test_ref[i].flatten(),
                                test_art_tv_3[i].flatten())))
                    ax.set_title('TV weight 3')
                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + 'tv3_' + str(i) + '.png')
                    else:
                        plt.show()

                    ax = imshow(test_art_tv_5[i])
                    plt.xticks([])
                    plt.yticks([])
                    ax.set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art_tv_5[i]),
                            ssim(test_ref[i],
                                 test_art_tv_5[i],
                                 data_range=(test_art_tv_5[i].max() -
                                             test_art_tv_5[i].min())),
                            nmi(test_ref[i].flatten(),
                                test_art_tv_5[i].flatten())))
                    ax.set_title('TV weight 5')
                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + 'tv5_' + str(i) + '.png')
                    else:
                        plt.show()

            else:
                fig, axes = plt.subplots(nrows=2,
                                         ncols=3,
                                         figsize=(15, 10),
                                         sharex=True,
                                         sharey=True)
                # fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 15), sharex=True, sharey=True)
                ax = axes.ravel()
                plt.gray()
                label = 'NRMSE: {:.2f}, SSIM: {:.3f}, NMI: {:.3f}'

                for i in range(len(test_ref)):
                    # orignal reconstructed images
                    ax[0].imshow(test_ref[i])
                    ax[0].set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_ref[i]),
                            ssim(test_ref[i],
                                 test_ref[i],
                                 data_range=(test_ref[i].max() -
                                             test_ref[i].min())),
                            nmi(test_ref[i].flatten(), test_ref[i].flatten())))
                    ax[0].set_title('reference image')

                    ax[1].imshow(test_art[i])
                    ax[1].set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art[i]),
                            ssim(test_ref[i],
                                 test_art[i],
                                 data_range=(test_art[i].max() -
                                             test_art[i].min())),
                            nmi(test_ref[i].flatten(), test_art[i].flatten())))
                    ax[1].set_title('motion-affected image')

                    ax[2].imshow(predict_art[i])
                    ax[2].set_xlabel(
                        label.format(
                            nrmse(test_ref[i], predict_art[i]),
                            ssim(test_ref[i],
                                 predict_art[i],
                                 data_range=(predict_art[i].max() -
                                             predict_art[i].min())),
                            nmi(test_ref[1].flatten(),
                                predict_art[i].flatten())))
                    ax[2].set_title('reconstructed image')

                    # TV denoiser
                    ax[3].imshow(test_art_tv_1[i])
                    ax[3].set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art_tv_1[i]),
                            ssim(test_ref[i],
                                 test_art_tv_1[i],
                                 data_range=(test_art_tv_1[i].max() -
                                             test_art_tv_1[i].min())),
                            nmi(test_ref[i].flatten(),
                                test_art_tv_1[i].flatten())))
                    ax[3].set_title('TV weight 1')

                    ax[4].imshow(test_art_tv_3[i])
                    ax[4].set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art_tv_3[i]),
                            ssim(test_ref[i],
                                 test_art_tv_3[i],
                                 data_range=(test_art_tv_3[i].max() -
                                             test_art_tv_3[i].min())),
                            nmi(test_ref[i].flatten(),
                                test_art_tv_3[i].flatten())))
                    ax[4].set_title('TV weight 3')

                    ax[5].imshow(test_art_tv_5[i])
                    ax[5].set_xlabel(
                        label.format(
                            nrmse(test_ref[i], test_art_tv_5[i]),
                            ssim(test_ref[i],
                                 test_art_tv_5[i],
                                 data_range=(test_art_tv_5[i].max() -
                                             test_art_tv_5[i].min())),
                            nmi(test_ref[i].flatten(),
                                test_art_tv_5[i].flatten())))
                    ax[5].set_title('TV weight 5')

                    if dParam['lSave']:
                        plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                    os.sep + str(i) + '.png')
                    else:
                        plt.show()

        else:
            plt.figure()
            plt.gray()
            for i in range(predict_art.shape[0]):
                plt.imshow(predict_art[i])
                if dParam['lSave']:
                    plt.savefig(dParam['sOutPath'] + os.sep + 'result' +
                                os.sep + str(i) + '.png',
                                dpi=300)
                else:
                    plt.show()
    else:
        nPatch = predict_art.shape[0]

        for i in range(nPatch // 4):
            fig, axes = plt.subplots(nrows=4, ncols=2)
            plt.gray()

            cols_title = ['original_art', 'predicted_art']

            for ax, col in zip(axes[0], cols_title):
                ax.set_title(col)

            for j in range(4):
                axes[j, 0].imshow(test_art[4 * i + j])
                axes[j, 1].imshow(predict_art[4 * i + j])

            if dParam['lSave']:
                plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep +
                            str(i) + '.png')
            else:
                plt.show()
Exemplo n.º 36
0
        return feature_model.predict(x, batch_size=self.batch_size)


if __name__ == "__main__":
    """
    An example for how to use SAE model on MNIST dataset. In terminal run
            python3 SAE.py
    to see the result.
    """
    import numpy as np
    from load_mnist import load_mnist
    x,y=load_mnist(sample_size=10000,seed=0)
    db = 'mnist'
    n_clusters = 10
    # define and train SAE model
    sae = SAE(dims=[x.shape[-1], 64,32])
    sae.fit(x=x, epochs=400)
    sae.autoencoders.save_weights('weights_%s.h5' % db)

    # extract features
    print ('Finished training, extracting features using the trained SAE model')
    features = sae.extract_feature(x)

    print ('performing k-means clustering on the extracted features')
    from sklearn.cluster import KMeans
    km = KMeans(n_clusters, n_init=20)
    y_pred = km.fit_predict(features)

    from sklearn.metrics import normalized_mutual_info_score as nmi
    print ('K-means clustering result on extracted features: NMI =', nmi(y, y_pred))
Exemplo n.º 37
0
            print "Optimization fails",v_lambda_idx

        # Repeat KMeans 50 times to reduce randomness
        label_tmp = []
        inertia_tmp = []
        for i in range(50):
            clf = KMeans(n_clusters=dim_q,init='random')
            clf.fit(U)
            label_tmp.append(list(clf.labels_))
            inertia_tmp.append(clf.inertia_)
    
        idx_tmp = inertia_tmp.index(min(inertia_tmp))
        label_u = label_tmp[idx_tmp]
        inertia_vec.append(min(inertia_tmp))
        score_vec.append(silhouette_score(U,np.array(label_u)))
        nmi_e.append(nmi(label_e,label_u))

beta_vec = np.array(beta_vec)

# Plot the result
plt.figure(0)
plt.plot(v_lambda_range,nmi_e,'r',label='nmi_e')
plt.xlabel("lambda(tradeoff between clustering quality and novelty)")
plt.ylabel("NMI value")
plt.legend(loc='upper left')

plt.figure(1)
plt.plot(v_lambda_range,inertia_vec)
plt.xlabel("lambda")
plt.ylabel("Inertia Value")
plt.figure(2)
Exemplo n.º 38
0
def run_net(data, params):
  """run the network with the parameters."""
  #
  # UNPACK DATA
  #

  x_train, y_train, x_val, y_val, x_test, y_test = data['cnc']['train_and_test']
  x_train_unlabeled, _, x_train_labeled, y_train_labeled = data['cnc'][
      'train_unlabeled_and_labeled']
  x_val_unlabeled, _, _, _ = data['cnc']['val_unlabeled_and_labeled']

  if 'siamese' in params['affinity']:
    pairs_train, dist_train, pairs_val, dist_val = data['siamese'][
        'train_and_test']

  x = np.concatenate((x_train, x_val, x_test), axis=0)
  y = np.concatenate((y_train, y_val, y_test), axis=0)

  if x_train_labeled:
    y_train_labeled_onehot = OneHotEncoder().fit_transform(
        y_train_labeled.reshape(-1, 1)).toarray()
  else:
    y_train_labeled_onehot = np.empty((0, len(np.unique(y))))

  #
  # SET UP INPUTS
  #

  # create true y placeholder (not used in unsupervised training)
  y_true = tf.placeholder(
      tf.float32, shape=(None, params['n_clusters']), name='y_true')

  batch_sizes = {
      'Unlabeled': params['batch_size'],
      'Labeled': params['batch_size']
  }

  input_shape = x.shape[1:]

  # inputs to CNC
  inputs = {
      'Unlabeled': Input(shape=input_shape, name='UnlabeledInput'),
      'Labeled': Input(shape=input_shape, name='LabeledInput'),
  }

  #
  # DEFINE AND TRAIN SIAMESE NET
  # http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf.

  # DEFINE AND TRAIN Siamese NET
  if params['affinity'] == 'siamese':
    siamese_net = networks.SiameseNet(inputs, params['siam_arch'],
                                      params.get('siam_reg'),
                                      params['main_path'], y_true)

    siamese_net.train(pairs_train, dist_train, pairs_val, dist_val,
                      params['siam_lr'], params['siam_drop'],
                      params['siam_patience'], params['siam_ne'],
                      params['siam_batch_size'], params['dset'])

  else:
    siamese_net = None

  #
  # DEFINE AND TRAIN CNC NET
  #
  cnc_net = networks.CncNet(inputs, params['cnc_arch'], params.get('cnc_reg'),
                            y_true, y_train_labeled_onehot,
                            params['n_clusters'], params['affinity'],
                            params['scale_nbr'], params['n_nbrs'], batch_sizes,
                            params['result_path'], params['dset'], siamese_net,
                            x_train, params['cnc_lr'], params['cnc_tau'],
                            params['bal_reg'])

  cnc_net.train(x_train_unlabeled, x_train_labeled, x_val_unlabeled,
                params['cnc_drop'], params['cnc_patience'], params['min_tem'],
                params['cnc_epochs'])

  #
  # EVALUATE
  #

  x_cncnet = cnc_net.predict(x)
  prediction = np.argmax(x_cncnet, 1)
  accuray_all = print_accuracy(prediction, y, params['n_clusters'])
  nmi_score_all = nmi(prediction, y)
  print('NMI: {0}'.format(np.round(nmi_score_all, 3)))

  if params['generalization_metrics']:
    x_cncnet_train = cnc_net.predict(x_train_unlabeled)
    x_cncnet_test = cnc_net.predict(x_test)

    prediction_train = np.argmax(x_cncnet_train, 1)
    accuray_train = print_accuracy(prediction_train, y_train,
                                   params['n_clusters'])
    nmi_score_train = nmi(prediction_train, y_train)
    print('TRAIN NMI: {0}'.format(np.round(nmi_score_train, 3)))

    prediction_test = np.argmax(x_cncnet_test, 1)
    accuray_test = print_accuracy(prediction_test, y_test, params['n_clusters'])
    nmi_score_test = nmi(prediction_test, y_test)
    print('TEST NMI: {0}'.format(np.round(nmi_score_test, 3)))
    with gfile.Open(params['result_path'] + 'results', 'w') as f:
      f.write(accuray_all + ' ' + accuray_train + ' ' + accuray_test + '\n')
      f.write(
          str(np.round(nmi_score_all, 3)) + ' ' +
          str(np.round(nmi_score_train, 3)) + ' ' +
          str(np.round(nmi_score_test, 3)) + '\n')

  else:
    with gfile.Open(params['result_path'] + 'results', 'w') as f:
      f.write(accuray_all + ' ' + str(np.round(nmi_score_all, 3)) + '\n')
Exemplo n.º 39
0
sigma_fac, aff_fac = compute_affinity(data_fac,flag_sigma=flag_sigma,\
        sigma=422.6228,nn=8)
print "kernel computing finished"
if flag_sigma == 'local':
    sigma_fou_init = sum(sigma_fou**2)/len(sigma_fou)
    sigma_fac_init = sum(sigma_fac**2)/len(sigma_fac)

K = 10
label_true = []
for i in range(K):
    for j in range(200):
        label_true.append(i)

# Spectral Clustering: Fourier coefficient
label_fou = spectral_clustering(aff_fou,n_clusters=K)
nmi_fou = nmi(label_fou,label_true)
print "NMI(Source 1)",nmi_fou

# SC: Autocorrelation Profile
label_fac = spectral_clustering(aff_fac,n_clusters=K)
nmi_fac = nmi(label_fac,label_true)
print "NMI(Source 2)",nmi_fac

# kernel addition
for alpha in np.arange(0.1,1.0,0.1):
    aff_add = alpha*aff_fou+(1-alpha)*aff_fac
    label_add = spectral_clustering(aff_add,n_clusters=K)
    nmi_add = nmi(label_add,label_true)
    print "NMI(a*source_1+(1-a)*source_2)",(alpha,nmi_add)

# Parameter settings