Пример #1
0
    def layerwise_pretrain(self,
                           X,
                           batch_size,
                           n_iter,
                           optimizer,
                           l_rate,
                           decay,
                           lr_scheduler=None):
        def l2_norm(label, pred):
            return np.mean(np.square(label - pred)) / 2.0

        solver = Solver('sgd',
                        momentum=0.9,
                        wd=decay,
                        learning_rate=l_rate,
                        lr_scheduler=lr_scheduler)
        solver.set_metric(mx.metric.CustomMetric(l2_norm))
        solver.set_monitor(Monitor(1000))
        data_iter = mx.io.NDArrayIter([X],
                                      batch_size=batch_size,
                                      shuffle=False,
                                      last_batch_handle='roll_over')
        for i in range(self.N):
            if i == 0:
                data_iter_i = data_iter
            else:
                X_i = model.extract_feature(self.internals[i - 1], self.args,
                                            ['data'], data_iter, X.shape[0],
                                            self.xpu).values()[0]
                data_iter_i = mx.io.NDArrayIter([X_i],
                                                batch_size=batch_size,
                                                last_batch_handle='roll_over')
            solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad,
                         ['data'], data_iter_i, 0, n_iter, self.args_mult)
Пример #2
0
    def finetune(self,
                 X,
                 batch_size,
                 n_iter,
                 optimizer,
                 l_rate,
                 decay,
                 lr_scheduler=None,
                 print_every=1000):
        def l2_norm(label, pred):
            return np.mean(np.square(label - pred)) / 2.0

        solver = Solver(optimizer,
                        momentum=0.9,
                        wd=decay,
                        learning_rate=l_rate,
                        lr_scheduler=lr_scheduler)
        solver.set_metric(mx.metric.CustomMetric(l2_norm))
        solver.set_monitor(Monitor(print_every))
        data_iter = mx.io.NDArrayIter({'data': X},
                                      batch_size=batch_size,
                                      shuffle=True,
                                      last_batch_handle='roll_over')
        logging.info('Fine tuning...')
        solver.solve(self.xpu, self.loss, self.args, self.args_grad, self.auxs,
                     data_iter, 0, n_iter, {}, False)
Пример #3
0
    def cluster(self, X, y=None, update_interval=None):
        N = X.shape[0]
        if not update_interval:
            update_interval = N
        batch_size = 256
        test_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False,
                                      last_batch_handle='pad')
        args = {k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items()}
        z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0]
        kmeans = KMeans(self.num_centers, n_init=20)
        kmeans.fit(z)
        args['dec_mu'][:] = kmeans.cluster_centers_
        solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01)

        def ce(label, pred):
            return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0]

        solver.set_metric(mx.metric.CustomMetric(ce))

        label_buff = np.zeros((X.shape[0], self.num_centers))
        train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=batch_size,
                                       shuffle=False, last_batch_handle='roll_over')
        self.y_pred = np.zeros((X.shape[0]))

        def refresh(i):
            if i % update_interval == 0:
                z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0]
                p = np.zeros((z.shape[0], self.num_centers))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                y_pred = p.argmax(axis=1)
                print(np.std(np.bincount(y_pred)), np.bincount(y_pred))
                print(np.std(np.bincount(y.astype(np.int))), np.bincount(y.astype(np.int)))
                if y is not None:
                    print(cluster_acc(y_pred, y)[0])
                weight = 1.0 / p.sum(axis=0)
                weight *= self.num_centers / weight.sum()
                p = (p ** 2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print(np.sum(y_pred != self.y_pred), 0.001 * y_pred.shape[0])
                if np.sum(y_pred != self.y_pred) < 0.001 * y_pred.shape[0]:
                    self.y_pred = y_pred
                    return True
                self.y_pred = y_pred

        solver.set_iter_start_callback(refresh)
        solver.set_monitor(Monitor(50))

        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 1000000000, {}, False)
        self.end_args = args
        if y is not None:
            return cluster_acc(self.y_pred, y)[0]
        else:
            return -1
Пример #4
0
    def finetune(self,
                 X,
                 R,
                 V,
                 lambda_v_rt,
                 lambda_u,
                 lambda_v,
                 dir_save,
                 batch_size,
                 n_iter,
                 optimizer,
                 l_rate,
                 decay,
                 lr_scheduler=None):
        def l2_norm(label, pred):
            print(type(label))
            print(type(pred))
            print(np.shape(label))
            print(np.shape(pred))
            return np.mean(np.square(label - pred)) / 2.0

        solver = Solver(optimizer,
                        momentum=0.9,
                        wd=decay,
                        learning_rate=l_rate,
                        lr_scheduler=lr_scheduler)
        solver.set_metric(mx.metric.CustomMetric(l2_norm))
        solver.set_monitor(Monitor(1000))
        data_iter = mx.io.NDArrayIter(
            {
                'data': X,
                'V': V,
                'lambda_v_rt': lambda_v_rt
            },
            batch_size=batch_size,
            shuffle=False,
            last_batch_handle='pad')
        logging.info('Fine tuning...')
        # self.loss is the net
        U, V, theta, BCD_loss = solver.solve(X, R, V, lambda_v_rt, lambda_u,
                                             lambda_v, dir_save, batch_size,
                                             self.xpu, self.loss, self.args,
                                             self.args_grad, self.auxs,
                                             data_iter, 0, n_iter, {}, False)
        return U, V, theta, BCD_loss
Пример #5
0
 def layerwise_pretrain(self, X, batch_size, n_iter, optimizer, l_rate, decay,
                        lr_scheduler=None, print_every=1000):
     def l2_norm(label, pred):
         return np.mean(np.square(label-pred))/2.0
     solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate,
                     lr_scheduler=lr_scheduler)
     solver.set_metric(mx.metric.CustomMetric(l2_norm))
     solver.set_monitor(Monitor(print_every))
     data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True,
                                   last_batch_handle='roll_over')
     for i in range(self.N):
         if i == 0:
             data_iter_i = data_iter
         else:
             X_i = list(model.extract_feature(
                 self.internals[i-1], self.args, self.auxs, data_iter, X.shape[0],
                 self.xpu).values())[0]
             data_iter_i = mx.io.NDArrayIter({'data': X_i}, batch_size=batch_size,
                                             last_batch_handle='roll_over')
         logging.info('Pre-training layer %d...', i)
         solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, self.auxs,
                      data_iter_i, 0, n_iter, {}, False)
Пример #6
0
 def layerwise_pretrain(self, X, b_size, total_iter, opti, l_rate, decay, scheduler_lr=None):
     def l2_norm(label, pred):
         return np.mean(np.square(label-pred))/2.0
     solver = Solver(opti, momentum=0.9, wd=decay, learning_rate=l_rate, scheduler_lr=scheduler_lr)
     # procedding solver.set_metric
     solver.set_metric(mx.metric.CustomMetric(l2_norm))
     # procedding solver.set_monitor
     solver.set_monitor(Monitor(1000))
     # procedding solver. data_iter
     data_iter = mx.io.NDArrayIter({'data': X}, b_size=b_size, shuffle=True,
                                   last_batch_handle='roll_over')
     # processing all is in range
     for i in range(self.N):
         if i == 0:
             data_iter_i = data_iter
         else:
             X_i = model.extract_feature(self.internals[i-1], self.args, self.auxs,
                                         data_iter, X.shape[0], self.xpu).values()[0]
             data_iter_i = mx.io.NDArrayIter({'data': X_i}, b_size=b_size,
                                             last_batch_handle='roll_over')
         logging.info('Pre-training layer %d...'%i)
         solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, self.auxs, data_iter_i,
                      0, total_iter, {}, False)
Пример #7
0
    def finetune(self,
                 X,
                 batch_size,
                 n_iter,
                 optimizer,
                 l_rate,
                 decay,
                 lr_scheduler=None):
        def l2_norm(label, pred):
            return np.mean(np.square(label - pred)) / 2.0

        solver = Solver('sgd',
                        momentum=0.9,
                        wd=decay,
                        learning_rate=l_rate,
                        lr_scheduler=lr_scheduler)
        solver.set_metric(mx.metric.CustomMetric(l2_norm))
        solver.set_monitor(Monitor(1000))
        data_iter = mx.io.NDArrayIter([X],
                                      batch_size=batch_size,
                                      shuffle=False,
                                      last_batch_handle='roll_over')
        solver.solve(self.xpu, self.loss, self.args, self.args_grad, ['data'],
                     data_iter, 0, n_iter, self.args_mult)
Пример #8
0
    def cluster_varying_mu(self,
                           X,
                           y_dec,
                           roi_labels,
                           classes,
                           batch_size,
                           save_to,
                           labeltype,
                           update_interval=None):
        # y = y_dec X, y_dec, roi_labels, classes, batch_size, save_to, labeltype, update_in
        N = X.shape[0]
        self.best_args['update_interval'] = update_interval
        self.best_args['y_dec'] = y_dec
        self.best_args['roi_labels'] = roi_labels
        self.best_args['classes'] = classes
        self.best_args['batch_size'] = batch_size

        # selecting batch size
        # [42*t for t in range(42)]  will produce 16 train epochs
        # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630]
        test_iter = mx.io.NDArrayIter({'data': X},
                                      batch_size=N,
                                      shuffle=False,
                                      last_batch_handle='pad')
        args = {
            k: mx.nd.array(v.asnumpy(), ctx=self.xpu)
            for k, v in self.args.items()
        }
        ## embedded point zi
        z = model.extract_feature(self.feature, args, None, test_iter, N,
                                  self.xpu).values()[0]

        # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
        self.perplexity = 25
        self.learning_rate = 200
        # reconstruct wordy labels list(Y)==named_y
        named_y = [classes[kc] for kc in y_dec]
        self.best_args['named_y'] = named_y

        # To initialize the cluster centers, we pass the data through
        # the initialized DNN to get embedded data points and then
        # perform standard k-means clustering in the feature space Z
        # to obtain k initial centroids {mu j}
        kmeans = KMeans(self.best_args['num_centers'], n_init=20)
        kmeans.fit(z)
        args['dec_mu'][:] = kmeans.cluster_centers_

        ### KL DIVERGENCE MINIMIZATION. eq(2)
        # our model is trained by matching the soft assignment to the target distribution.
        # To this end, we define our objective as a KL divergence loss between
        # the soft assignments qi (pred) and the auxiliary distribution pi (label)
        solver = Solver(
            'sgd', momentum=0.9, wd=0.0, learning_rate=0.01
        )  # , lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.4)) #0.01

        def ce(label, pred):
            return np.sum(label * np.log(label /
                                         (pred + 0.000001))) / label.shape[0]

        solver.set_metric(mx.metric.CustomMetric(ce))

        label_buff = np.zeros((X.shape[0], self.best_args['num_centers']))
        train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff},
                                       batch_size=self.best_args['batch_size'],
                                       shuffle=False,
                                       last_batch_handle='roll_over')
        self.best_args['y_pred'] = np.zeros((X.shape[0]))
        self.best_args['acci'] = []
        self.best_args['bestacci'] = []
        self.ploti = 0
        figprogress = plt.figure(figsize=(20, 15))
        print 'Batch_size = %f' % self.best_args['batch_size']
        print 'update_interval = %f' % update_interval
        self.best_args['plot_interval'] = int(5 * update_interval)
        print 'plot_interval = %f' % self.best_args['plot_interval']
        self.maxAcc = 0.0

        def refresh(i):  # i=3, a full epoch occurs every i=798/48
            if i % self.best_args['update_interval'] == 0:
                z = list(
                    model.extract_feature(self.feature, args, None, test_iter,
                                          N, self.xpu).values())[0]

                p = np.zeros((z.shape[0], self.best_args['num_centers']))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                # the soft assignments qi (pred)
                y_pred = p.argmax(axis=1)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)

                # use a y that only considers the filledbyBC examples
                # compare soft assignments with known labels
                print '\n... Updating i = %f' % i
                allL = np.asarray(roi_labels)
                data = z[allL != 'K', :]
                datalabels = allL[allL != 'K']
                RFmodel = RandomForestClassifier(n_jobs=2,
                                                 n_estimators=200,
                                                 random_state=0,
                                                 verbose=0)
                # Evaluate a score by cross-validation
                # integer=5, to specify the number of folds in a (Stratified)KFold,
                scores = cross_val_score(RFmodel, data, datalabels, cv=5)

                # do for overall class B and M
                labels = np.asarray(self.best_args['named_y'])
                Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=6)
                # This finds the indices of 5 closest neighbors
                nme_dist = [
                    'Diffuse', 'Focal', 'Linear', 'MultipleRegions',
                    'Regional', 'Segmental', 'N/A'
                ]
                nme_intenh = [
                    'Clumped', 'ClusteredRing', 'Heterogeneous', 'Homogeneous',
                    'Stippled or punctate', 'N/A'
                ]
                wnme_dist = np.zeros((len(nme_dist), len(nme_dist)),
                                     dtype=np.int64)
                wnme_intenh = np.zeros((len(nme_intenh), len(nme_intenh)),
                                       dtype=np.int64)
                NME_descriptorate = []
                kznme = 0
                for k in range(z.shape[0]):
                    iclass = labels[k]
                    dist, ind = Z_embedding_tree.query([z[k]], k=6)
                    dist5nn, ind5nn = dist[k != ind], ind[k != ind]
                    class5nn = labels[ind5nn]
                    if (len(class5nn) > 0
                            and (iclass.split('_')[1] != 'N/A'
                                 or iclass.split('_')[1] != 'N/A')):
                        # increment detections for final NME descriptor accuracy
                        kznme += 1
                        prednmed = []
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Diffuse'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Focal'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Linear'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'MultipleRegions'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Regional'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Segmental'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'N/A' for lab in class5nn
                            ]))
                        prednmeie = []
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Clumped'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'ClusteredRing'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Heterogeneous'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Homogeneous'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Stippled or punctate'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'N/A' for lab in class5nn
                            ]))
                        # predicion based on majority
                        pred_nme_dist = [
                            nme_dist[l] for l, pc in enumerate(prednmed)
                            if pc >= max(prednmed) and max(prednmed) > 0
                        ]
                        pred_nme_intenh = [
                            nme_intenh[l] for l, pc in enumerate(prednmeie)
                            if pc >= max(prednmeie) and max(prednmeie) > 0
                        ]

                        # allow a second kind of detection rate - a nme similar local neighborhood
                        if (iclass.split('_')[1] in pred_nme_dist
                                or iclass.split('_')[2] in pred_nme_intenh):
                            NME_descriptorate.append(1)

                        # for nme_dist
                        label_nme_dist = [
                            l for l, pc in enumerate(nme_dist)
                            if iclass.split('_')[1] == pc
                        ]
                        labelpred_nme_dist = [
                            l for l, pc in enumerate(prednmed)
                            if pc >= max(prednmed) and max(prednmed) > 0
                        ]
                        for u in label_nme_dist:
                            for v in labelpred_nme_dist:
                                wnme_dist[v, u] += 1

                        # for nme_intenh
                        label_nme_intenh = [
                            l for l, pc in enumerate(nme_intenh)
                            if iclass.split('_')[2] == pc
                        ]
                        labelpred_intenh = [
                            l for l, pc in enumerate(prednmeie)
                            if pc >= max(prednmeie) and max(prednmeie) > 0
                        ]
                        for u in label_nme_intenh:
                            for v in labelpred_intenh:
                                wnme_intenh[v, u] += 1

                # compute Z-space Accuracy
                Acc = scores.mean()
                print "cvRF Z-space Accuracy = %f " % Acc
                print scores.tolist()

                NME_Acc = sum(np.asarray(NME_descriptorate)) / float(kznme)
                print "NME decriptor agreenment (NMErate) = %f " % NME_Acc
                indwnme_dist = linear_assignment(wnme_dist.max() - wnme_dist)
                indwnme_intenh = linear_assignment(wnme_intenh.max() -
                                                   wnme_intenh)
                Acc5nn_nme_dist = sum(
                    [wnme_dist[v, u] for v, u in indwnme_dist]) / float(kznme)
                Acc5nn_nme_intenh = sum(
                    [wnme_intenh[v, u]
                     for v, u in indwnme_intenh]) / float(kznme)
                print "Acc5nn_nme_dist (DIST) = %f " % Acc5nn_nme_dist
                print "Acc5nn_nme_intenh (INT_ENH) = %f " % Acc5nn_nme_intenh

                if (i == 0):
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)
                    self.best_args['initAcc'] = Acc
                    # plot initial z
                    figinint = plt.figure()
                    axinint = figinint.add_subplot(1, 1, 1)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        axinint,
                        title=
                        'kmeans init tsne: Acc={}\n NME_Acc={}\n Acc5nn_nme_dist={}\n Acc5nn_nme_intenh={}'
                        .format(Acc, NME_Acc, Acc5nn_nme_dist,
                                Acc5nn_nme_intenh),
                        legend=True)
                    figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format(
                        save_to, self.best_args['znum'],
                        self.best_args['num_centers'], labeltype),
                                     bbox_inches='tight')
                    plt.close()

                # save best args
                self.best_args['acci'].append(Acc)
                if (Acc >= self.maxAcc):
                    print 'Improving maxAcc = {}'.format(Acc)
                    for key, v in args.items():
                        self.best_args[key] = args[key]

                    self.maxAcc = Acc
                    self.best_args['zbestacci'] = z
                    self.best_args['bestacci'].append(Acc)
                    self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy()
                    self.best_args['NME_Acc'] = NME_Acc
                    self.best_args['Acc5nn_nme_dist'] = Acc5nn_nme_dist
                    self.best_args['Acc5nn_nme_intenh'] = Acc5nn_nme_intenh

                if (i > 0 and i % self.best_args['plot_interval'] == 0
                        and self.ploti <= 15):
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)
                    axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        axprogress,
                        title="Epoch %d z_tsne iter (%d)" % (self.ploti, i),
                        legend=False)
                    self.ploti = self.ploti + 1

                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                weight = 1.0 / p.sum(axis=0)  # p.sum provides fj
                weight *= self.best_args['num_centers'] / weight.sum()
                p = (p**2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print np.sum(y_pred != self.best_args['y_pred']
                             ), 0.001 * y_pred.shape[0]

                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                if i == self.best_args[
                        'update_interval'] * 100:  # performs 1epoch = 615/3 = 205*1000epochs
                    self.best_args['y_pred'] = y_pred
                    self.best_args['p'] = p
                    self.best_args['z'] = z
                    self.best_args['acci'].append(Acc)
                    return True

                self.best_args['y_pred'] = y_pred
                self.best_args['p'] = p
                self.best_args['z'] = z

        # start solver
        solver.set_iter_start_callback(refresh)
        solver.set_monitor(Monitor(50))
        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 1000000000, {}, False)
        self.end_args = args
        self.best_args['end_args'] = args

        # finish
        figprogress = plt.gcf()
        figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format(
            save_to, self.best_args['znum'], self.best_args['num_centers'],
            labeltype),
                            bbox_inches='tight')
        plt.close()

        # plot final z
        figfinal = plt.figure()
        axfinal = figfinal.add_subplot(1, 1, 1)
        tsne = TSNE(n_components=2,
                    perplexity=self.perplexity,
                    learning_rate=self.learning_rate,
                    init='pca',
                    random_state=0,
                    verbose=2,
                    method='exact')
        Z_tsne = tsne.fit_transform(self.best_args['zbestacci'])
        plot_embedding_unsuper_NMEdist_intenh(
            Z_tsne,
            self.best_args['named_y'],
            axfinal,
            title=
            'final tsne: Acc={}\n NME_Acc={} \n Acc5nn_nme_dist={}\n Acc5nn_nme_intenh={}'
            .format(self.best_args['bestacci'][-1], self.best_args['NME_Acc'],
                    self.best_args['Acc5nn_nme_dist'],
                    self.best_args['Acc5nn_nme_intenh']),
            legend=True)
        figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format(
            save_to, self.best_args['znum'], self.best_args['num_centers'],
            labeltype),
                         bbox_inches='tight')
        plt.close()

        outdict = {
            'acc': self.best_args['acci'],
            'bestacc': self.best_args['bestacci'],
            'NME_Acc': self.best_args['NME_Acc'],
            'Acc5nn_nme_dist': self.best_args['Acc5nn_nme_dist'],
            'Acc5nn_nme_intenh': self.best_args['Acc5nn_nme_intenh'],
            'p': self.best_args['p'],
            'z': self.best_args['z'],
            'y_pred': self.best_args['y_pred'],
            'named_y': self.best_args['named_y'],
            'num_centers': self.best_args['num_centers']
        }

        return outdict
Пример #9
0
    def cluster(self, X_train, y_dec_train, y_train, classes, batch_size, save_to, labeltype, update_interval, logger):
        N = X_train.shape[0]
        self.best_args['update_interval'] = update_interval
        self.best_args['y_dec'] = y_dec_train 
        self.best_args['roi_labels'] = y_train
        self.best_args['classes'] = classes
        self.best_args['batch_size'] = batch_size
        self.logger = logger
        
        # selecting batch size
        # [42*t for t in range(42)]  will produce 16 train epochs
        # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630]
        test_iter = mx.io.NDArrayIter({'data': X_train}, 
                                      batch_size=N, shuffle=False,
                                      last_batch_handle='pad')
        args = {k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items()}
        ## embedded point zi 
        self.z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0]        
                
        # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
        self.perplexity = 5
        self.learning_rate = 125
        # reconstruct wordy labels list(Y)==named_y
        named_y = [classes[kc] for kc in y_dec_train]
        self.best_args['named_y'] = named_y
        
        # To initialize the cluster centers, we pass the data through
        # the initialized DNN to get embedded data points and then
        # perform standard k-means clustering in the feature space Z
        # to obtain k initial centroids {mu j}
        kmeans = KMeans(self.best_args['num_centers'], n_init=20)
        kmeans.fit(self.z)
        args['dec_mu'][:] = kmeans.cluster_centers_
        
        figprogress = plt.figure(figsize=(20, 15))  
        print 'Batch_size = %f'% self.best_args['batch_size']
        print 'update_interval = %f'%  update_interval
        self.best_args['plot_interval'] = int(8*update_interval)
        print 'plot_interval = %f'%  self.best_args['plot_interval']
        self.best_args['y_pred'] = np.zeros((X_train.shape[0]))
        self.best_args['meanAuc_cv'] = [] 
        self.best_args['std_auc'] = [] 
        self.best_args['auc_val'] = []
        self.best_args['overall_metric'] = []
        self.ploti = 0
        self.maxAUC = 10000.0
        
        ### Define DEC training varialbes
        label_buff = np.zeros((X_train.shape[0], self.best_args['num_centers']))
        train_iter = mx.io.NDArrayIter({'data': X_train}, 
                                       {'label': label_buff}, 
                                       batch_size=self.best_args['batch_size'],
                                       shuffle=True, last_batch_handle='roll_over')
        ### KL DIVERGENCE MINIMIZATION. eq(2)
        # our model is trained by matching the soft assignment to the target distribution. 
        # To this end, we define our objective as a KL divergence loss between 
        # the soft assignments qi (pred) and the auxiliary distribution pi (label)
        solver = Solver('sgd',learning_rate=0.1,lr_scheduler=mx.misc.FactorScheduler(100,0.1))   ### original: 0.01, try1: Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.000125, lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.5))  try 2: Solver('sgd', momentum=0.6, wd=0.05, learning_rate=0.00125, lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.5)) 
        #solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01)
        def ce(label, pred):
            DECmetric = np.sum(label*np.log(label/(pred+0.000001)))/label.shape[0]
            print("DECmetric = {}".format(DECmetric))
            
            #####################
            # Z-space MLP fully coneected layer for classification
            #####################
            batch_size = 50
            # Run classifier with cross-validation and plot ROC curves
            cv = StratifiedKFold(n_splits=5, random_state=3)
            # Evaluate a score by cross-validation
            tprs = []; aucs = []
            mean_fpr = np.linspace(0, 1, 100)
            cvi = 0
            for train, test in cv.split(self.Z_train, self.yZ_train):
                # Multilayer Perceptron
                MLP_train_iter = mx.io.NDArrayIter(self.Z_train[train], self.yZ_train[train], batch_size, shuffle=True)
                MLP_val_iter = mx.io.NDArrayIter(self.Z_train[test], self.yZ_train[test], batch_size)    
                
                # We’ll define the MLP using MXNet’s symbolic interface
                dataMLP = mx.sym.Variable('data')
                
                #The following code declares two fully connected layers with 128 and 64 neurons each. 
                #Furthermore, these FC layers are sandwiched between ReLU activation layers each 
                #one responsible for performing an element-wise ReLU transformation on the FC layer output.
                # The first fully-connected layer and the corresponding activation function
                fc1  = mx.sym.FullyConnected(data=dataMLP, num_hidden = 128)
                act1 = mx.sym.Activation(data=fc1, act_type="relu")
                
                fc2  = mx.sym.FullyConnected(data=act1, num_hidden = 32)
                act2 = mx.sym.Activation(data=fc2, act_type="relu")
                
                # data has 2 classes
                fc3  = mx.sym.FullyConnected(data=act2, num_hidden=2)
                # Softmax with cross entropy loss
                mlp  = mx.sym.SoftmaxOutput(data=fc3, name='softmax')
                
                # create a trainable module on CPU                   
                #mon = mx.mon.Monitor(interval=100, pattern='.*', sort=True); # Defaults to mean absolute value |x|/size(x)
                #checkpoint = mx.callback.do_checkpoint('mlp_model_params_z{}_mu{}.arg'.format(self.best_args['znum'],self.best_args['num_centers']))
                self.mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu())
                self.mlp_model.fit(MLP_train_iter,  # train data
                              monitor=None,
                              optimizer='sgd',  # use SGD to train
                              optimizer_params={'learning_rate':0.1},  # use fixed learning rate
                              eval_metric= 'acc', #MLPacc(yZ_val, Z_val),  # report accuracy during trainin
                              num_epoch=100)
                              #epoch_end_callbackcheckpoint)  # train for at most 10 dataset passes. extras:               #monitor=mon,

                #After the above training completes, we can evaluate the trained model by running predictions on validation data. 
                #The following source code computes the prediction probability scores for each validation data. 
                # prob[i][j] is the probability that the i-th validation contains the j-th output class.
                prob_val = self.mlp_model.predict(MLP_val_iter)
                # Compute ROC curve and area the curve
                fpr, tpr, thresholds = roc_curve(self.yZ_train[test], prob_val.asnumpy()[:,1])
                # to create an ROC with 100 pts
                tprs.append(interp(mean_fpr, fpr, tpr))
                tprs[-1][0] = 0.0
                roc_auc = auc(fpr, tpr)
                print roc_auc
                aucs.append(roc_auc)
                cvi += 1
                
            # compute across all cvs
            mean_tpr = np.mean(tprs, axis=0)
            mean_tpr[-1] = 1.0
            mean_auc = auc(mean_fpr, mean_tpr)
            std_auc = np.std(aucs)
            print r'cv meanROC (AUC = {0:.4f} $\pm$ {0:.4f})'.format(mean_auc, std_auc)

            Z_test_iter = mx.io.NDArrayIter(self.Z_test,  None, batch_size)
            prob_test = self.mlp_model.predict(Z_test_iter)
            # Compute ROC curve and area the curve
            fpr_val, tpr_val, thresholds_val = roc_curve(self.yZ_test, prob_test.asnumpy()[:,1])
            self.auc_val = auc(fpr_val, tpr_val)
            print r'cv test (AUC = {0:.4f})'.format(self.auc_val)
                                
            # compute Z-space metric
            overall_metric = -np.log(mean_auc) -np.log(1-DECmetric) #np.log(1-mean_auc) + np.log(DECmetric)
            print("overall_metric: DEC+MLP = {}".format(overall_metric))
            self.best_args['overall_metric'].append(overall_metric)
            
            if(overall_metric <= self.maxAUC):
                print '================== Improving auc_val = {}'.format(self.auc_val)
                for key, v in args.items():
                    self.best_args[key] = args[key]
                    
                self.best_args['meanAuc_cv'].append(mean_auc)
                self.best_args['std_auc'].append(std_auc)                    
                self.best_args['auc_val'].append(self.auc_val)
                self.best_args['pbestacci'] = self.p
                self.best_args['zbestacci']  = self.z 
                self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy()
                #self.best_args['mlp_model'] = self.mlp_model
                self.mlp_model.save_params(os.path.join(save_to,'mlp_model_params_z{}_mu{}.arg'.format(self.best_args['znum'],self.best_args['num_centers'])))
                self.maxAUC = overall_metric
                
            return overall_metric
            
        def refresh(i): # i=3, a full epoch occurs every i=798/48
            if i%self.best_args['update_interval'] == 0:
                self.z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0]
                self.p = np.zeros((self.z.shape[0], self.best_args['num_centers']))
                self.dec_op.forward([self.z, args['dec_mu'].asnumpy()], [self.p])
                self.best_args['dec_mu'] = args['dec_mu']
                
                # the soft assignments qi (pred)
                y_pred = self.p.argmax(axis=1)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)
                 
                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                print '\n... Updating  i = %f' % i   
                weight = 1.0/self.p.sum(axis=0) # p.sum provides fj
                weight *= self.best_args['num_centers']/weight.sum()
                self.p = (self.p**2)*weight
                train_iter.data_list[1][:] = (self.p.T/self.p.sum(axis=1)).T
                #print np.sum(y_pred != self.best_args['y_pred']), 0.001*y_pred.shape[0]
                
                #####################
                # prep Z-space MLP fully coneected layer for classification
                #####################
                # compare soft assignments with known labels (only B or M)
                print '\n... Updating  MLP fully coneected layer i = %f' % i   
                sep = int(self.z.shape[0]*0.10)
                print(self.z.shape)
                datalabels = np.asarray(self.best_args['roi_labels'])
                dataZspace = np.concatenate((self.z, self.p), axis=1) #zbestacci #dec_model['zbestacci']   
                Z = dataZspace[datalabels!='K',:]
                y = datalabels[datalabels!='K']
                print(Z)
                                
                # Do a 5 fold cross-validation
                self.Z_test = Z[:sep]
                self.yZ_test = np.asanyarray(y[:sep]=='M').astype(int) 
                self.Z_train = Z[sep:]
                self.yZ_train = np.asanyarray(y[sep:]=='M').astype(int) 
                print(self.Z_test.shape)
                print(self.Z_train.shape)
                
                if(i==0):
                    self.tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate,
                                init='pca', random_state=0, verbose=2, method='exact')
                    self.Z_tsne = self.tsne.fit_transform(dataZspace)  
                    
                    # plot initial z        
                    figinint = plt.figure()
                    axinint = figinint.add_subplot(1,1,1)
                    plot_embedding_unsuper_NMEdist_intenh(self.Z_tsne, named_y, axinint, title='kmeans init tsne:\n', legend=True)
                    figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight')     
                    plt.close() 
                
                if(i>0 and i%self.best_args['plot_interval']==0 and self.ploti<=15): 
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate,
                         init='pca', random_state=0, verbose=2, method='exact')
                    Z_tsne = tsne.fit_transform(dataZspace)
                    axprogress = figprogress.add_subplot(4,4,1+self.ploti)
                    plot_embedding_unsuper_NMEdist_intenh(Z_tsne, named_y, axprogress, title="iter %d z_tsne" % (i), legend=False)
                    self.ploti = self.ploti+1
                                    
                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                if i == self.best_args['update_interval']*120: # performs 1epoch = 615/3 = 205*1000epochs                     
                    return True 
        
        # Deeo learning metrics to minimize
        solver.set_metric(mx.metric.CustomMetric(ce))

        # start solver
        solver.set_iter_start_callback(refresh)
        solver.set_monitor(Monitor(self.best_args['update_interval']))
        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 1000000000, {}, False)
        self.end_args = args
        self.best_args['end_args'] = args
        
        # finish                
        figprogress = plt.gcf()
        figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight')    
        plt.close()    
        
         # plot final z        
        figfinal = plt.figure()
        axfinal = figfinal.add_subplot(1,1,1)
        tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate,
             init='pca', random_state=0, verbose=2, method='exact')
        Z_tsne = tsne.fit_transform(self.z)      
        plot_embedding_unsuper_NMEdist_intenh(Z_tsne, self.best_args['named_y'], axfinal, title='final tsne', legend=True)
        figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight')    
        plt.close()          

        outdict = {'meanAuc_cv':self.best_args['meanAuc_cv'], 
                    'std_auc':self.best_args['std_auc'], 
                    'auc_val':self.best_args['auc_val'],
                    'overall_metric':self.best_args['overall_metric'],
                    'dec_mu':self.best_args['dec_mu'],
                    'y_pred': self.best_args['y_pred'],
                    'named_y': self.best_args['named_y'],
                    'classes':self.best_args['classes'],
                    'num_centers': self.best_args['num_centers'],
                    'znum':self.best_args['znum'],
                    'update_interval':self.best_args['update_interval'],
                    'batch_size':self.best_args['batch_size']}                               
        return outdict
Пример #10
0
    def cluster(self,
                X_train,
                y_dec_train,
                y_train,
                classes,
                batch_size,
                save_to,
                labeltype,
                update_interval=None):
        N = X_train.shape[0]
        self.best_args['update_interval'] = update_interval
        self.best_args['y_dec'] = y_dec_train
        self.best_args['roi_labels'] = y_train
        self.best_args['classes'] = classes
        self.best_args['batch_size'] = batch_size

        # selecting batch size
        # [42*t for t in range(42)]  will produce 16 train epochs
        # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630]
        test_iter = mx.io.NDArrayIter({'data': X_train},
                                      batch_size=N,
                                      shuffle=False,
                                      last_batch_handle='pad')
        args = {
            k: mx.nd.array(v.asnumpy(), ctx=self.xpu)
            for k, v in self.args.items()
        }
        ## embedded point zi
        z = model.extract_feature(self.feature, args, None, test_iter, N,
                                  self.xpu).values()[0]

        # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
        self.perplexity = 15
        self.learning_rate = 125
        # reconstruct wordy labels list(Y)==named_y
        named_y = [classes[kc] for kc in y_dec_train]
        self.best_args['named_y'] = named_y

        # To initialize the cluster centers, we pass the data through
        # the initialized DNN to get embedded data points and then
        # perform standard k-means clustering in the feature space Z
        # to obtain k initial centroids {mu j}
        kmeans = KMeans(self.best_args['num_centers'], n_init=20)
        kmeans.fit(z)
        args['dec_mu'][:] = kmeans.cluster_centers_

        ### KL DIVERGENCE MINIMIZATION. eq(2)
        # our model is trained by matching the soft assignment to the target distribution.
        # To this end, we define our objective as a KL divergence loss between
        # the soft assignments qi (pred) and the auxiliary distribution pi (label)
        solver = Solver(
            'sgd',
            momentum=0.9,
            wd=0.0,
            learning_rate=0.1,
            lr_scheduler=mx.misc.FactorScheduler(20 * update_interval, 0.5)
        )  # , lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.4)) #0.01

        def ce(label, pred):
            return np.sum(label * np.log(label /
                                         (pred + 0.000001))) / label.shape[0]

        solver.set_metric(mx.metric.CustomMetric(ce))

        label_buff = np.zeros(
            (X_train.shape[0], self.best_args['num_centers']))
        train_iter = mx.io.NDArrayIter({'data': X_train},
                                       {'label': label_buff},
                                       batch_size=self.best_args['batch_size'],
                                       shuffle=False,
                                       last_batch_handle='roll_over')
        self.best_args['y_pred'] = np.zeros((X_train.shape[0]))
        self.best_args['acci'] = []
        self.best_args['bestacci'] = []
        self.ploti = 0
        figprogress = plt.figure(figsize=(20, 15))
        print 'Batch_size = %f' % self.best_args['batch_size']
        print 'update_interval = %f' % update_interval
        self.best_args['plot_interval'] = int(20 * update_interval)
        print 'plot_interval = %f' % self.best_args['plot_interval']
        self.maxAcc = 0.0

        def refresh(i):  # i=3, a full epoch occurs every i=798/48
            if i % self.best_args['update_interval'] == 0:
                z = list(
                    model.extract_feature(self.feature, args, None, test_iter,
                                          N, self.xpu).values())[0]

                p = np.zeros((z.shape[0], self.best_args['num_centers']))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                # the soft assignments qi (pred)
                y_pred = p.argmax(axis=1)
                #print np.std(np.bincount(y_dec_train)), np.bincount(y_dec_train)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)

                #####################
                # Z-space CV RF classfier METRICS
                #####################
                # compare soft assignments with known labels (only B or M)
                print '\n... Updating i = %f' % i
                allL = np.asarray(y_train)
                dataZspace = np.concatenate(
                    (z[allL != 'K', :], np.reshape(y_pred[allL != 'K'],
                                                   (-1, 1))),
                    axis=1)
                ydatalabels = np.asarray(allL[allL != 'K'] == 'M').astype(
                    int)  # malignant is positive class

                cv = StratifiedKFold(n_splits=5)
                RFmodel = RandomForestClassifier(n_jobs=2,
                                                 n_estimators=500,
                                                 random_state=0,
                                                 verbose=0)
                # Evaluate a score by cross-validation
                tprs = []
                aucs = []
                mean_fpr = np.linspace(0, 1, 100)
                cvi = 0
                for train, test in cv.split(dataZspace, ydatalabels):
                    probas = RFmodel.fit(dataZspace[train],
                                         ydatalabels[train]).predict_proba(
                                             dataZspace[test])
                    # Compute ROC curve and area the curve
                    fpr, tpr, thresholds = roc_curve(ydatalabels[test],
                                                     probas[:, 1])
                    # to create an ROC with 100 pts
                    tprs.append(interp(mean_fpr, fpr, tpr))
                    tprs[-1][0] = 0.0
                    roc_auc = auc(fpr, tpr)
                    aucs.append(roc_auc)
                    cvi += 1

                mean_tpr = np.mean(tprs, axis=0)
                mean_tpr[-1] = 1.0
                mean_auc = auc(mean_fpr, mean_tpr)

                # integer=5, to specify the number of folds in a (Stratified)KFold,
                #scores_BorM = cross_val_score(RFmodel, data, datalabels, cv=5)
                # compute Z-space Accuracy
                #Acc = scores_BorM.mean()
                Acc = mean_auc
                print "cvRF BorM mean_auc = %f " % Acc
                #print scores_BorM.tolist()

                if (i == 0):
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)
                    self.best_args['initAcc'] = Acc
                    # plot initial z
                    figinint = plt.figure()
                    axinint = figinint.add_subplot(1, 1, 1)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        axinint,
                        title='kmeans init tsne: Acc={}'.format(Acc),
                        legend=True)
                    figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format(
                        save_to, self.best_args['znum'],
                        self.best_args['num_centers'], labeltype),
                                     bbox_inches='tight')
                    plt.close()

                # save best args
                self.best_args['acci'].append(Acc)
                if (Acc >= self.maxAcc):
                    print 'Improving mean_auc = {}'.format(Acc)
                    for key, v in args.items():
                        self.best_args[key] = args[key]

                    self.maxAcc = Acc
                    self.best_args['pbestacci'] = p
                    self.best_args['zbestacci'] = z
                    self.best_args['bestacci'].append(Acc)
                    self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy()

                if (i > 0 and i % self.best_args['plot_interval'] == 0
                        and self.ploti <= 15):
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)
                    axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        axprogress,
                        title="Epoch %d z_tsne Acc (%f)" % (i, Acc),
                        legend=False)
                    self.ploti = self.ploti + 1

                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                weight = 1.0 / p.sum(axis=0)  # p.sum provides fj
                weight *= self.best_args['num_centers'] / weight.sum()
                p = (p**2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print np.sum(y_pred != self.best_args['y_pred']
                             ), 0.001 * y_pred.shape[0]

                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                if i == self.best_args[
                        'update_interval'] * 200:  # performs 1epoch = 615/3 = 205*1000epochs
                    self.best_args['y_pred'] = y_pred
                    self.best_args['acci'].append(Acc)
                    return True

                self.best_args['y_pred'] = y_pred

        # start solver
        solver.set_iter_start_callback(refresh)
        solver.set_monitor(Monitor(20))
        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 1000000000, {}, False)
        self.end_args = args
        self.best_args['end_args'] = args

        # finish
        figprogress = plt.gcf()
        figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format(
            save_to, self.best_args['znum'], self.best_args['num_centers'],
            labeltype),
                            bbox_inches='tight')
        plt.close()

        # plot final z
        figfinal = plt.figure()
        axfinal = figfinal.add_subplot(1, 1, 1)
        tsne = TSNE(n_components=2,
                    perplexity=self.perplexity,
                    learning_rate=self.learning_rate,
                    init='pca',
                    random_state=0,
                    verbose=2,
                    method='exact')
        Z_tsne = tsne.fit_transform(self.best_args['zbestacci'])
        plot_embedding_unsuper_NMEdist_intenh(
            Z_tsne,
            self.best_args['named_y'],
            axfinal,
            title='final tsne: Acc={}'.format(self.best_args['bestacci'][-1]),
            legend=True)
        figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format(
            save_to, self.best_args['znum'], self.best_args['num_centers'],
            labeltype),
                         bbox_inches='tight')
        plt.close()

        outdict = {
            'initAcc': self.best_args['initAcc'],
            'acci': self.best_args['acci'],
            'bestacci': self.best_args['bestacci'],
            'pbestacci': self.best_args['pbestacci'],
            'zbestacci': self.best_args['zbestacci'],
            'dec_mubestacci': self.best_args['dec_mu'],
            'y_pred': self.best_args['y_pred'],
            'named_y': self.best_args['named_y'],
            'classes': self.best_args['classes'],
            'num_centers': self.best_args['num_centers'],
            'znum': self.best_args['znum'],
            'update_interval': self.best_args['update_interval'],
            'batch_size': self.best_args['batch_size']
        }

        return outdict
Пример #11
0
    def cluster_varying_mu(self,
                           X,
                           y_dec,
                           roi_labels,
                           classes,
                           save_to,
                           labeltype,
                           update_interval=None):
        # y = y_dec
        N = X.shape[0]
        self.best_args['update_interval'] = update_interval
        self.best_args['y_dec'] = y_dec
        self.best_args['roi_labels'] = roi_labels
        self.best_args['classes'] = classes

        # selecting batch size
        # [42*t for t in range(42)]  will produce 16 train epochs
        # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630]
        batch_size = self.best_args['batch_size']  #615/3 42  #256
        test_iter = mx.io.NDArrayIter({'data': X},
                                      batch_size=batch_size,
                                      shuffle=False,
                                      last_batch_handle='pad')
        args = {
            k: mx.nd.array(v.asnumpy(), ctx=self.xpu)
            for k, v in self.args.items()
        }
        ## embedded point zi
        z = model.extract_feature(self.feature, args, None, test_iter, N,
                                  self.xpu).values()[0]

        # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
        self.perplexity = 15
        self.learning_rate = 200
        tsne = TSNE(n_components=2,
                    perplexity=self.perplexity,
                    learning_rate=self.learning_rate,
                    init='pca',
                    random_state=0,
                    verbose=2,
                    method='exact')
        Z_tsne = tsne.fit_transform(z)

        # plot initial z
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        # reconstruct wordy labels list(Y)==named_y
        named_y = [classes[kc] for kc in y_dec]
        self.best_args['named_y'] = named_y
        plot_embedding_unsuper_NMEdist_intenh(
            Z_tsne,
            named_y,
            ax,
            title='{} tsne with perplexity {}'.format(labeltype,
                                                      self.perplexity),
            legend=True)
        fig.savefig(save_to + os.sep + 'iter1_tsne_init_z' +
                    str(self.best_args['znum']) + '_varying_mu' +
                    str(self.best_args['num_centers']) + '.pdf',
                    bbox_inches='tight')
        plt.close()

        # To initialize the cluster centers, we pass the data through
        # the initialized DNN to get embedded data points and then
        # perform standard k-means clustering in the feature space Z
        # to obtain k initial centroids {mu j}
        kmeans = KMeans(self.best_args['num_centers'], n_init=20)
        kmeans.fit(z)
        args['dec_mu'][:] = kmeans.cluster_centers_

        ### KL DIVERGENCE MINIMIZATION. eq(2)
        # our model is trained by matching the soft assignment to the target distribution.
        # To this end, we define our objective as a KL divergence loss between
        # the soft assignments qi (pred) and the auxiliary distribution pi (label)
        solver = Solver('sgd',
                        momentum=0.9,
                        wd=0.0,
                        learning_rate=0.01,
                        lr_scheduler=mx.misc.FactorScheduler(
                            100 * update_interval, 0.5))  #0.01

        def ce(label, pred):
            return np.sum(label * np.log(label /
                                         (pred + 0.000001))) / label.shape[0]

        solver.set_metric(mx.metric.CustomMetric(ce))

        label_buff = np.zeros((X.shape[0], self.best_args['num_centers']))
        train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff},
                                       batch_size=N,
                                       shuffle=False,
                                       last_batch_handle='roll_over')
        self.best_args['y_pred'] = np.zeros((X.shape[0]))
        self.best_args['acci'] = []
        self.best_args['bestacci'] = []
        self.ploti = 0
        fig = plt.figure(figsize=(20, 15))
        print 'Batch_size = %f' % batch_size
        print 'update_interval = %f' % update_interval
        self.best_args['plot_interval'] = int(25 * update_interval)
        print 'plot_interval = %f' % self.best_args['plot_interval']
        self.maxAcc = 0.0

        def refresh(i):  # i=3, a full epoch occurs every i=798/48
            if i % self.best_args['update_interval'] == 0:
                z = model.extract_feature(self.feature, args, None, test_iter,
                                          N, self.xpu).values()[0]

                p = np.zeros((z.shape[0], self.best_args['num_centers']))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                # the soft assignments qi (pred)
                y_pred = p.argmax(axis=1)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)

                # use a y that only considers the filledbyBC examples
                # compare soft assignments with known labels
                print '\n... Updating i = %f' % i
                # do for overall class B and M
                Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=5)
                # This finds the indices of 5 closest neighbors
                labels = np.asarray(self.best_args['roi_labels'])
                Neg = sum(labels == np.unique(labels)[0])  #for B
                Pos = sum(labels == np.unique(labels)[2])  #for M
                TP = []
                TN = []
                for k in range(z.shape[0]):
                    iclass = labels[k]
                    dist, ind = Z_embedding_tree.query([z[k]], k=6)
                    dist5nn, ind5nn = dist[k != ind], ind[k != ind]
                    class5nn = labels[ind5nn]
                    # exlcude U class
                    class5nn = class5nn[class5nn != 'K']
                    if (len(class5nn) > 0):
                        predc = []
                        for c in np.unique(class5nn):
                            predc.append(sum(class5nn == c))
                        # predicion based on majority
                        predclass = np.unique(class5nn)[predc == max(predc)]

                        if (len(predclass) == 1):
                            # compute TP if M
                            if (iclass == 'M'):
                                TP.append(predclass[0] == iclass)
                            # compute TN if B
                            if (iclass == 'B'):
                                TN.append(predclass[0] == iclass)

                        if (len(predclass) == 2):
                            # compute TP if M
                            if (iclass == 'M'):
                                TP.append(predclass[1] == iclass)
                            # compute TN if B
                            if (iclass == 'B'):
                                TN.append(predclass[0] == iclass)

                # compute TPR and TNR
                TPR = sum(TP) / float(Pos)
                TNR = sum(TN) / float(Neg)
                Acc = sum(TP + TN) / float(Pos + Neg)
                print "True Posite Rate (TPR) = %f " % TPR
                print "True Negative Rate (TNR) = %f " % TNR
                print "Accuracy (Acc) = %f " % Acc

                # save best args
                self.best_args['acci'].append(Acc)
                if (Acc >= self.maxAcc):
                    print 'Improving maxAcc = {}'.format(Acc)
                    for key, v in args.items():
                        self.best_args[key] = args[key]

                    self.maxAcc = Acc
                    self.best_args['bestacci'].append(Acc)
                    self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy()

                if (i % self.best_args['plot_interval'] == 0
                        and self.ploti <= 15):
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)

                    ax = fig.add_subplot(4, 4, 1 + self.ploti)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        ax,
                        title="Epoch %d z_tsne iter (%d)" % (self.ploti, i),
                        legend=False)
                    self.ploti = self.ploti + 1

                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                weight = 1.0 / p.sum(axis=0)  # p.sum provides fj
                weight *= self.best_args['num_centers'] / weight.sum()
                p = (p**2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print np.sum(y_pred != self.best_args['y_pred']
                             ), 0.001 * y_pred.shape[0]

                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                if i == self.best_args[
                        'update_interval'] * 600:  # performs 1epoch = 615/3 = 205*1000epochs
                    self.best_args['y_pred'] = y_pred
                    self.best_args['p'] = p
                    self.best_args['z'] = z
                    self.best_args['acci'].append(Acc)
                    return True

                self.best_args['y_pred'] = y_pred
                self.best_args['p'] = p
                self.best_args['z'] = z

        # start solver
        solver.set_iter_start_callback(refresh)
        solver.set_monitor(Monitor(100))
        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 1000000000, {}, False)
        self.end_args = args
        self.best_args['end_args'] = args

        # finish
        fig = plt.gcf()
        fig.savefig(save_to + os.sep + 'iter1_tsne_progress_z' +
                    str(self.best_args['znum']) + '_varying_mu' +
                    str(self.best_args['num_centers']) + '.pdf',
                    bbox_inches='tight')
        plt.close()

        # plot final z
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        tsne = TSNE(n_components=2,
                    perplexity=self.perplexity,
                    learning_rate=self.learning_rate,
                    init='pca',
                    random_state=0,
                    verbose=2,
                    method='exact')
        Z_tsne = tsne.fit_transform(self.best_args['z'])
        plot_embedding_unsuper_NMEdist_intenh(Z_tsne,
                                              named_y,
                                              ax,
                                              title="tsne with perplexity %d" %
                                              self.perplexity,
                                              legend=True)
        fig.savefig(save_to + os.sep + 'tsne_final_z' +
                    str(self.best_args['znum']) + '_varying_mu' +
                    str(self.best_args['num_centers']) + '.pdf',
                    bbox_inches='tight')
        plt.close()

        outdict = {
            'acc': self.best_args['acci'],
            'bestacc': self.best_args['bestacci'],
            'p': self.best_args['p'],
            'z': self.best_args['z'],
            'y_pred': self.best_args['y_pred'],
            'named_y': self.best_args['named_y'],
            'num_centers': self.best_args['num_centers']
        }

        return outdict
Пример #12
0
    def cluster_unsuperv(self, X, y, y_tsne, fighome, update_interval=None):
        N = X.shape[0]
        plotting_interval = N
        if not update_interval:
            update_interval = int(self.batch_size / 5.0)

        # selecting batch size
        # [42*t for t in range(42)]  will produce 16 train epochs
        # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630]
        batch_size = self.batch_size  #615/3 42  #256
        test_iter = mx.io.NDArrayIter({'data': X},
                                      batch_size=batch_size,
                                      shuffle=False,
                                      last_batch_handle='pad')
        args = {
            k: mx.nd.array(v.asnumpy(), ctx=self.xpu)
            for k, v in self.args.items()
        }
        ## embedded point zi
        z = model.extract_feature(self.feature, args, None, test_iter, N,
                                  self.xpu).values()[0]

        # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
        pp = 15
        tsne = TSNE(n_components=2,
                    perplexity=pp,
                    learning_rate=275,
                    init='pca',
                    random_state=0,
                    verbose=2,
                    method='exact')
        Z_tsne = tsne.fit_transform(z)

        # plot initial z
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        plot_embedding(Z_tsne,
                       y_tsne,
                       ax,
                       title="tsne with perplexity %d" % pp,
                       legend=True,
                       plotcolor=True)
        fig.savefig(fighome + os.sep + 'tsne_init_z' + str(self.znum) + '.pdf',
                    bbox_inches='tight')
        plt.close()

        # To initialize the cluster centers, we pass the data through
        # the initialized DNN to get embedded data points and then
        # perform standard k-means clustering in the feature space Z
        # to obtain k initial centroids {mu j}
        kmeans = KMeans(self.num_centers, n_init=20)
        kmeans.fit(z)
        args['dec_mu'][:] = kmeans.cluster_centers_

        ### KL DIVERGENCE MINIMIZATION. eq(2)
        # our model is trained by matching the soft assignment to the target distribution.
        # To this end, we define our objective as a KL divergence loss between
        # the soft assignments qi (pred) and the auxiliary distribution pi (label)
        solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01)

        def ce(label, pred):
            return np.sum(label * np.log(label /
                                         (pred + 0.000001))) / label.shape[0]

        solver.set_metric(mx.metric.CustomMetric(ce))

        label_buff = np.zeros((X.shape[0], self.num_centers))
        train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff},
                                       batch_size=batch_size,
                                       shuffle=False,
                                       last_batch_handle='roll_over')
        self.y_pred = np.zeros((X.shape[0]))
        self.solvermetric = []
        self.ploti = 0
        fig = plt.figure(figsize=(20, 15))
        print 'Batch_size = %f' % self.batch_size
        print 'update_interval = %f' % update_interval
        print 'tolernace = len(ypred)/1000 = %f' % float(
            0.001 * self.y_pred.shape[0])

        def refresh_unsuperv(i):
            if i % update_interval == 0:
                print '... Updating i = %f' % i
                z = model.extract_feature(self.feature, args, None, test_iter,
                                          N, self.xpu).values()[0]
                p = np.zeros((z.shape[0], self.num_centers))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                # the soft assignments qi (pred)
                y_pred = p.argmax(axis=1)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)

                if y is not None:
                    # compare soft assignments with known labels (unused)
                    print np.std(np.bincount(y.astype(np.int))), np.bincount(
                        y.astype(np.int))
                    print y_pred[0:5], y.astype(np.int)[0:5]
                    print 'Clustering Acc = %f' % cluster_acc(y_pred, y)[0]
                    self.acci.append(cluster_acc(y_pred, y)[0])

                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                weight = 1.0 / p.sum(axis=0)  # p.sum provides fj
                weight *= self.num_centers / weight.sum()
                p = (p**2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print "sum(I(y'-1!=y) = %f" % np.sum(y_pred != self.y_pred)
                self.solvermetric.append(solver.metric.get()[1])
                print "solver.metric = %f" % solver.metric.get()[1]

                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                #                if np.sum(y_pred != self.y_pred) < 0.001*y_pred.shape[0]: # performs 1epoch = 615/3 = 205*1000epochs #
                #                    self.y_pred = y_pred
                #                    return True

                self.y_pred = y_pred
                self.p = p
                self.z = z

            # to plot
            if i % plotting_interval == 0:
                if (self.ploti <= 15):
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2,
                                perplexity=15,
                                learning_rate=275,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(self.z)

                    ax = fig.add_subplot(3, 4, 1 + self.ploti)
                    plot_embedding(Z_tsne,
                                   y_tsne,
                                   ax,
                                   title="Epoch %d z_tsne iter (%d)" %
                                   (self.ploti, i),
                                   legend=False,
                                   plotcolor=True)
                    self.ploti = self.ploti + 1

        # start solver
        solver.set_iter_start_callback(refresh_unsuperv)
        # monitor every self.batch_size
        solver.set_monitor(Monitor(self.batch_size))
        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 12 * N, {}, False)
        # finish
        fig = plt.gcf()
        fig.savefig(fighome + os.sep + 'tsne_progress_k' +
                    str(self.num_centers) + '_z' + str(self.znum) + '.pdf',
                    bbox_inches='tight')
        plt.close()

        # plot progression of clustering loss
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(range(len(self.solvermetric)), self.solvermetric, '-.')
        ax.set_xlabel("iter")
        ax.set_ylabel("L loss for num_centers =" + str(self.num_centers))
        fig.savefig(fighome + os.sep + 'clustering_loss_numcenters' +
                    str(self.num_centers) + '_z' + str(self.znum) + '.pdf',
                    bbox_inches='tight')
        plt.close()

        self.end_args = args
        outdict = {'p': self.p, 'z': self.z, 'y_pred': self.y_pred}

        return outdict
Пример #13
0
    def cluster(self,
                X_train,
                y_dec_train,
                y_train,
                classes,
                batch_size,
                save_to,
                labeltype,
                update_interval=None):
        N = X_train.shape[0]
        self.best_args['update_interval'] = update_interval
        self.best_args['y_dec'] = y_dec_train
        self.best_args['roi_labels'] = y_train
        self.best_args['classes'] = classes
        self.best_args['batch_size'] = batch_size

        # selecting batch size
        # [42*t for t in range(42)]  will produce 16 train epochs
        # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630]
        test_iter = mx.io.NDArrayIter({'data': X_train},
                                      batch_size=N,
                                      shuffle=False,
                                      last_batch_handle='pad')
        args = {
            k: mx.nd.array(v.asnumpy(), ctx=self.xpu)
            for k, v in self.args.items()
        }
        ## embedded point zi
        z = model.extract_feature(self.feature, args, None, test_iter, N,
                                  self.xpu).values()[0]

        # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
        self.perplexity = 15
        self.learning_rate = 125
        # reconstruct wordy labels list(Y)==named_y
        named_y = [classes[kc] for kc in y_dec_train]
        self.best_args['named_y'] = named_y

        # To initialize the cluster centers, we pass the data through
        # the initialized DNN to get embedded data points and then
        # perform standard k-means clustering in the feature space Z
        # to obtain k initial centroids {mu j}
        kmeans = KMeans(self.best_args['num_centers'], n_init=20)
        kmeans.fit(z)
        args['dec_mu'][:] = kmeans.cluster_centers_

        ### KL DIVERGENCE MINIMIZATION. eq(2)
        # our model is trained by matching the soft assignment to the target distribution.
        # To this end, we define our objective as a KL divergence loss between
        # the soft assignments qi (pred) and the auxiliary distribution pi (label)
        solver = Solver(
            'sgd', momentum=0.9, wd=0.0, learning_rate=0.01
        )  # , lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.4)) #0.01

        def ce(label, pred):
            return np.sum(label * np.log(label /
                                         (pred + 0.000001))) / label.shape[0]

        solver.set_metric(mx.metric.CustomMetric(ce))

        label_buff = np.zeros(
            (X_train.shape[0], self.best_args['num_centers']))
        train_iter = mx.io.NDArrayIter({'data': X_train},
                                       {'label': label_buff},
                                       batch_size=self.best_args['batch_size'],
                                       shuffle=False,
                                       last_batch_handle='roll_over')
        self.best_args['y_pred'] = np.zeros((X_train.shape[0]))
        self.best_args['acci'] = []
        self.best_args['bestacci'] = []
        self.ploti = 0
        figprogress = plt.figure(figsize=(20, 15))
        print 'Batch_size = %f' % self.best_args['batch_size']
        print 'update_interval = %f' % update_interval
        self.best_args['plot_interval'] = int(20 * update_interval)
        print 'plot_interval = %f' % self.best_args['plot_interval']
        self.maxAcc = 0.0

        def refresh(i):  # i=3, a full epoch occurs every i=798/48
            if i % self.best_args['update_interval'] == 0:
                z = list(
                    model.extract_feature(self.feature, args, None, test_iter,
                                          N, self.xpu).values())[0]

                p = np.zeros((z.shape[0], self.best_args['num_centers']))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                # the soft assignments qi (pred)
                y_pred = p.argmax(axis=1)
                print np.std(np.bincount(y_dec)), np.bincount(y_dec)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)

                #####################
                # Z-space CV RF classfier METRICS
                #####################
                # compare soft assignments with known labels (only B or M)
                print '\n... Updating i = %f' % i
                datalabels = np.asarray(y_train)
                dataZspace = np.concatenate(
                    (z, p), axis=1)  #zbestacci #dec_model['zbestacci']
                Xdata = dataZspace[datalabels != 'K', :]
                ydatalabels = datalabels[datalabels != 'K']
                RFmodel = RandomForestClassifier(n_jobs=2,
                                                 n_estimators=500,
                                                 random_state=0,
                                                 verbose=0)
                # Evaluate a score by cross-validation
                # integer=5, to specify the number of folds in a (Stratified)KFold,
                scores_BorM = cross_val_score(RFmodel,
                                              Xdata,
                                              ydatalabels,
                                              cv=5)
                # compute Z-space Accuracy
                Acc = scores_BorM.mean()
                print "cvRF BorM Accuracy = %f " % Acc
                print scores_BorM.tolist()

                # use only the filledbyBC examples (first 0-202 exaples)
                nme_dist_label = [
                    lab.split('_')[1]
                    for lab in self.best_args['named_y'][0:202]
                ]
                nme_intenh_label = [
                    lab.split('_')[2]
                    for lab in self.best_args['named_y'][0:202]
                ]
                # compute Z-space Accuracy
                scores_dist = cross_val_score(RFmodel,
                                              z[0:202],
                                              nme_dist_label,
                                              cv=5)
                print "cvRF nme_dist Accuracy = %f " % scores_dist.mean()
                scores_intenh = cross_val_score(RFmodel,
                                                z[0:202],
                                                nme_intenh_label,
                                                cv=5)
                print "cvRF nme_intenh Accuracy = %f " % scores_intenh.mean()

                #####################
                # CALCULATE 5nn METRICS
                #####################
                labels = np.asarray(self.best_args['named_y'])
                Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=4)
                # This finds the indices of 5 closest neighbors
                nme_dist = [
                    'Diffuse', 'Focal', 'Linear', 'MultipleRegions',
                    'Regional', 'Segmental'
                ]
                nme_intenh = [
                    'Clumped', 'ClusteredRing', 'Heterogeneous', 'Homogeneous',
                    'Stippled or punctate'
                ]
                wnme_dist = np.zeros((len(nme_dist), len(nme_dist)),
                                     dtype=np.int64)
                wnme_intenh = np.zeros((len(nme_intenh), len(nme_intenh)),
                                       dtype=np.int64)
                BorM_diag = []
                TP = []
                TN = []
                FP = []
                FN = []
                missed = []
                NME_descript_dist = []
                NME_descript_intenh = []
                # count stattistics
                for k in range(z.shape[0]):
                    iclass = labels[k]
                    dist, ind = Z_embedding_tree.query([z[k]], k=2)
                    dist5nn, ind5nn = dist[k != ind], ind[k != ind]
                    class5nn = labels[ind5nn]
                    # compute DIAGNOSTIC ACC based on nme similar local neighborhood
                    if (iclass.split('_')[0] != 'K'):
                        predBorM = []
                        predBorM.append(
                            sum([lab.split('_')[0] == 'M'
                                 for lab in class5nn]))
                        predBorM.append(
                            sum([lab.split('_')[0] == 'B'
                                 for lab in class5nn]))
                        predBorM.append(
                            sum([lab.split('_')[0] == 'K'
                                 for lab in class5nn]))
                        pred_BorM = [
                            ['M', 'B', 'K'][l] for l, pc in enumerate(predBorM)
                            if pc >= max(predBorM) and max(predBorM) > 0
                        ][0]
                        # compute TP if M
                        if (pred_BorM != 'K'):
                            if (iclass.split('_')[0] == pred_BorM):
                                BorM_diag.append(1)
                                # confusino table
                                if (iclass.split('_')[0] == 'M'):
                                    TP.append(1)
                                if (iclass.split('_')[0] == 'B'):
                                    TN.append(1)
                            if (iclass.split('_')[0] != pred_BorM):
                                if (iclass.split('_')[0] == 'M'):
                                    FN.append(1)
                                if (iclass.split('_')[0] == 'B'):
                                    FP.append(1)
                        else:
                            missed.append(1)

                    if (k <= 202 and iclass.split('_')[1] != 'N/A'):
                        # increment detections for final NME descriptor accuracy
                        prednmed = []
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Diffuse'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Focal'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Linear'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'MultipleRegions'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Regional'
                                for lab in class5nn
                            ]))
                        prednmed.append(
                            sum([
                                lab.split('_')[1] == 'Segmental'
                                for lab in class5nn
                            ]))
                        # predicion based on majority voting
                        pred_nme_dist = [
                            nme_dist[l] for l, pc in enumerate(prednmed)
                            if pc >= max(prednmed) and max(prednmed) > 0
                        ]
                        # compute NME ACC based on nme similar local neighborhood
                        if (iclass.split('_')[1] in pred_nme_dist):
                            NME_descript_dist.append(1)

                    if (k <= 202 and iclass.split('_')[2] != 'N/A'):
                        prednmeie = []
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Clumped'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'ClusteredRing'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Heterogeneous'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Homogeneous'
                                for lab in class5nn
                            ]))
                        prednmeie.append(
                            sum([
                                lab.split('_')[2] == 'Stippled or punctate'
                                for lab in class5nn
                            ]))
                        # predicion based on majority voting
                        pred_nme_intenh = [
                            nme_intenh[l] for l, pc in enumerate(prednmeie)
                            if pc >= max(prednmeie) and max(prednmeie) > 0
                        ]
                        # compute NME ACC based on nme similar local neighborhoo
                        if (iclass.split('_')[2] in pred_nme_intenh):
                            NME_descript_intenh.append(1)

                #####################
                # collect stats
                #####################
                BorM_diag_Acc = sum(BorM_diag) / float(len(datalabels))
                #Acc = BorM_diag_Acc
                print "BorM_diag_Acc = %f " % BorM_diag_Acc
                ## good if high
                TPR = sum(TP) / float(sum(datalabels == 'M'))
                print "TPR = %f " % TPR
                TNR = sum(TN) / float(sum(datalabels == 'B'))
                print "TNR = %f " % TNR
                ##  bad if high
                FPR = sum(FP) / float(sum(datalabels == 'B'))
                print "FPR = %f " % FPR
                FNR = sum(FN) / float(sum(datalabels == 'M'))
                print "FNR = %f " % FNR
                # good if reduces
                missedR = sum(np.asarray(missed)) / float(len(datalabels))
                print "missedR = %f " % missedR

                Acc5nn_nme_dist = sum(NME_descript_dist) / 202.0
                Acc5nn_nme_intenh = sum(NME_descript_intenh) / 202.0
                print "Acc5nn_nme_dist (DIST) = %f " % Acc5nn_nme_dist
                print "Acc5nn_nme_intenh (INT_ENH) = %f " % Acc5nn_nme_intenh

                if (i == 0):
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)
                    self.best_args['initAcc'] = Acc
                    # plot initial z
                    figinint = plt.figure()
                    axinint = figinint.add_subplot(1, 1, 1)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        axinint,
                        title=
                        'kmeans init tsne: Acc={}\n RF_nme_dist={}\n RF_intenh={}'
                        .format(Acc, scores_dist.mean(), scores_intenh.mean()),
                        legend=True)
                    figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format(
                        save_to, self.best_args['znum'],
                        self.best_args['num_centers'], labeltype),
                                     bbox_inches='tight')
                    plt.close()

                # save best args
                self.best_args['acci'].append(Acc)
                if (Acc >= self.maxAcc):
                    print 'Improving maxAcc = {}'.format(Acc)
                    for key, v in args.items():
                        self.best_args[key] = args[key]

                    self.maxAcc = Acc
                    self.best_args['pbestacci'] = p
                    self.best_args['zbestacci'] = z
                    self.best_args['bestacci'].append(Acc)
                    self.best_args['cvRF_nme_dist'] = scores_dist.mean()
                    self.best_args['cvRF_nme_intenh'] = scores_intenh.mean()
                    self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy()
                    self.best_args['BorM_diag_Acc'] = BorM_diag_Acc
                    self.best_args['TPR'] = TPR
                    self.best_args['TNR'] = TNR
                    self.best_args['FPR'] = FPR
                    self.best_args['FNR'] = FNR
                    self.best_args['missedR'] = missedR
                    self.best_args['Acc5nn_nme_dist'] = Acc5nn_nme_dist
                    self.best_args['Acc5nn_nme_intenh'] = Acc5nn_nme_intenh

                if (i > 0 and i % self.best_args['plot_interval'] == 0
                        and self.ploti <= 15):
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)
                    axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        axprogress,
                        title="Epoch %d z_tsne Acc (%f)" % (i, Acc),
                        legend=False)
                    self.ploti = self.ploti + 1

                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                weight = 1.0 / p.sum(axis=0)  # p.sum provides fj
                weight *= self.best_args['num_centers'] / weight.sum()
                p = (p**2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print np.sum(y_pred != self.best_args['y_pred']
                             ), 0.001 * y_pred.shape[0]

                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                if i == self.best_args[
                        'update_interval'] * 100:  # performs 1epoch = 615/3 = 205*1000epochs
                    self.best_args['y_pred'] = y_pred
                    self.best_args['acci'].append(Acc)
                    return True

                self.best_args['y_pred'] = y_pred

        # start solver
        solver.set_iter_start_callback(refresh)
        solver.set_monitor(Monitor(20))
        solver.solve(self.xpu, self.loss, args, self.args_grad, None,
                     train_iter, 0, 1000000000, {}, False)
        self.end_args = args
        self.best_args['end_args'] = args

        # finish
        figprogress = plt.gcf()
        figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format(
            save_to, self.best_args['znum'], self.best_args['num_centers'],
            labeltype),
                            bbox_inches='tight')
        plt.close()

        # plot final z
        figfinal = plt.figure()
        axfinal = figfinal.add_subplot(1, 1, 1)
        tsne = TSNE(n_components=2,
                    perplexity=self.perplexity,
                    learning_rate=self.learning_rate,
                    init='pca',
                    random_state=0,
                    verbose=2,
                    method='exact')
        Z_tsne = tsne.fit_transform(self.best_args['zbestacci'])
        plot_embedding_unsuper_NMEdist_intenh(
            Z_tsne,
            self.best_args['named_y'],
            axfinal,
            title='final tsne: Acc={}\n RF_nme_dist={}\n RF_intenh={}'.format(
                self.best_args['bestacci'][-1],
                self.best_args['cvRF_nme_dist'],
                self.best_args['cvRF_nme_intenh']),
            legend=True)
        figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format(
            save_to, self.best_args['znum'], self.best_args['num_centers'],
            labeltype),
                         bbox_inches='tight')
        plt.close()

        outdict = {
            'initAcc': self.best_args['initAcc'],
            'acci': self.best_args['acci'],
            'bestacci': self.best_args['bestacci'],
            'pbestacci': self.best_args['pbestacci'],
            'zbestacci': self.best_args['zbestacci'],
            'dec_mubestacci': self.best_args['dec_mu'],
            'cvRF_nme_dist': self.best_args['cvRF_nme_dist'],
            'cvRF_nme_intenh': self.best_args['cvRF_nme_intenh'],
            'BorM_diag_Acc': self.best_args['BorM_diag_Acc'],
            'TPR': self.best_args['TPR'],
            'TNR': self.best_args['TNR'],
            'FPR': self.best_args['FPR'],
            'FNR': self.best_args['FNR'],
            'missedR': self.best_args['missedR'],
            'Acc5nn_nme_dist': self.best_args['Acc5nn_nme_dist'],
            'Acc5nn_nme_intenh': self.best_args['Acc5nn_nme_intenh'],
            'y_pred': self.best_args['y_pred'],
            'named_y': self.best_args['named_y'],
            'classes': self.best_args['classes'],
            'num_centers': self.best_args['num_centers'],
            'znum': self.best_args['znum'],
            'update_interval': self.best_args['update_interval'],
            'batch_size': self.best_args['batch_size']
        }
        return outdict