Exemplo n.º 1
0
def nd_f1(pred, label, num_class, average="micro"):
    """Evaluate F1 using mx.nd.NDArray

    Parameters
    ----------
    pred : nd.NDArray
        Shape (num, label_num) or (num,)
    label : nd.NDArray
        Shape (num, label_num) or (num,)
    num_class : int
    average : str

    Returns
    -------
    f1 : float
    """
    if pred.dtype != np.float32:
        pred = pred.astype(np.float32)
        label = label.astype(np.float32)
    assert num_class > 1
    assert pred.ndim == label.ndim
    if num_class == 2 and average == "micro":
        tp = nd.sum((pred == 1) * (label == 1)).asscalar()
        fp = nd.sum((pred == 1) * (label == 0)).asscalar()
        fn = nd.sum((pred == 0) * (label == 1)).asscalar()
        precision = float(tp) / (tp + fp)
        recall = float(tp) / (tp + fn)
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        assert num_class is not None
        pred_onehot = nd.one_hot(indices=pred, depth=num_class)
        label_onehot = nd.one_hot(indices=label, depth=num_class)
        tp = pred_onehot * label_onehot
        fp = pred_onehot * (1 - label_onehot)
        fn = (1 - pred_onehot) * label_onehot
        if average == "micro":
            tp = nd.sum(tp).asscalar()
            fp = nd.sum(fp).asscalar()
            fn = nd.sum(fn).asscalar()
            precision = float(tp) / (tp + fp)
            recall = float(tp) / (tp + fn)
            f1 = 2 * (precision * recall) / (precision + recall)
        elif average == "macro":
            if tp.ndim == 3:
                tp = nd.sum(tp, axis=(0, 1))
                fp = nd.sum(fp, axis=(0, 1))
                fn = nd.sum(fn, axis=(0, 1))
            else:
                tp = nd.sum(tp, axis=0)
                fp = nd.sum(fp, axis=0)
                fn = nd.sum(fn, axis=0)
            precision = nd.mean(tp / (tp + fp)).asscalar()
            recall = nd.mean(tp / (tp + fn)).asscalar()
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            raise NotImplementedError
    return f1
Exemplo n.º 2
0
def evaluate_accuracy(data_iterator, num_examples, batch_size, params, net,
                      pool_type, pool_size, pool_stride, act_type, dilate_size,
                      nf):
    numerator = 0.
    denominator = 0.
    for batch_i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
        label = label.as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 10)
        output, _ = net(data,
                        params,
                        pool_type=pool_type,
                        pool_size=pool_size,
                        pool_stride=pool_stride,
                        act_type=act_type,
                        dilate_size=dilate_size,
                        nf=nf)
        predictions = nd.argmax(output, axis=1)
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
        print('Evaluating accuracy. (complete percent: %.2f/100' %
              (1.0 * batch_i / (num_examples // batch_size) * 100) + ')',
              end='')
        sys.stdout.write("\r")
    return (numerator / denominator).asscalar()
    def generate_learned_samples(self):
        '''
        Draw and generate data.

        Returns:
            `Tuple` data. The shape is ...
            - `mxnet.ndarray` of observed data points in training.
            - `mxnet.ndarray` of supervised data in training.
            - `mxnet.ndarray` of observed data points in test.
            - `mxnet.ndarray` of supervised data in test.
        '''
        for _ in range(self.iter_n):
            training_batch_arr, test_batch_arr = None, None
            training_label_arr, test_label_arr = None, None
            row_arr = np.arange(self.__train_observed_arr.shape[0])
            np.random.shuffle(row_arr)

            training_batch_arr = self.__train_observed_arr[
                row_arr[:self.batch_size]]
            training_batch_arr = mx.ndarray.array(training_batch_arr,
                                                  ctx=self.__ctx)
            training_batch_arr = self.pre_normalize(training_batch_arr)

            label_key_arr = self.__train_label_arr[row_arr[:self.batch_size]]
            label_key_arr = mx.ndarray.array(label_key_arr, ctx=self.__ctx)
            training_label_arr = nd.one_hot(label_key_arr, self.__label_n)

            test_row_arr = np.arange(self.__test_observed_arr.shape[0])
            np.random.shuffle(test_row_arr)

            test_batch_arr = self.__test_observed_arr[
                test_row_arr[:self.batch_size]]
            test_batch_arr = mx.ndarray.array(test_batch_arr, ctx=self.__ctx)
            test_batch_arr = self.pre_normalize(test_batch_arr)

            test_label_key_arr = self.__test_label_arr[
                test_row_arr[:self.batch_size]]
            test_label_key_arr = mx.ndarray.array(test_label_key_arr,
                                                  ctx=self.__ctx)
            test_label_arr = nd.one_hot(test_label_key_arr, self.__label_n)

            if self.__noiseable_data is not None:
                training_batch_arr = self.__noiseable_data.noise(
                    training_batch_arr)

            yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
Exemplo n.º 4
0
    def forward(self, pred, label):
        label = nd.one_hot(label, self.nclass)
        alpha_p = nd.relu(self.op - pred)
        alpha_n = nd.relu(pred - self.on)

        pred = (label * (alpha_p * (pred - self.delta_p)) + (1-label) * (alpha_n * (pred - self.delta_n))) * self.scale

        return self.loss(pred, label)
Exemplo n.º 5
0
 def sample_v_given_h(self, h0):
     v1_prob = self.propdown(h0).reshape([-1, self.n_val])
     v1_prob = nd.softmax(v1_prob)
     v1_args = nd.sample_multinomial(v1_prob)
     v1 = nd.one_hot(v1_args, self.n_val)
     return [
         v1_prob.reshape([-1, self.n_node]),
         v1.reshape([-1, self.n_node])
     ]
def get_minibatch(data_iter):
    try:
        batch = data_iter.next()
    except StopIteration:
        data_iter.reset()
        batch = data_iter.next()

    x = batch.data[0]
    x = nd.reshape(x, (x.shape[0], -1))
    y = nd.one_hot(batch.label[0], 10)
    return x, y
Exemplo n.º 7
0
def evaluate_accuracy(data_iterator, net):
    numerator = 0.
    denominator = 0.
    loss_avg = 0.
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape((-1, 784))
        label = label.as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 10)
        output = net(data)
        loss = cross_entropy(output, label_one_hot)
        predictions = nd.argmax(output, axis=1)
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
        loss_avg = loss_avg * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1)
    return (numerator / denominator).asscalar(), loss_avg
Exemplo n.º 8
0
 def loss(self, data, label, train=True):
     data = data.as_in_context(ctx).reshape((data.shape[0],self.num_channel,1,-1))
     label = label.as_in_context(ctx)
     label_one_hot = nd.one_hot(label, self.model.output_dim)
     
     if train:
         with autograd.record():
             output, _= self.model.network(X=data)
             loss = softmax_cross_entropy(output, label_one_hot)
         loss.backward()
         return loss
     else:
         output, _ = self.model.network(X=data)
         loss = softmax_cross_entropy(output, label_one_hot)
         return loss, output
Exemplo n.º 9
0
def plot(netG):
    num_image = 8
    for i in range(num_image):
        latent_z = mx.nd.random_normal(0,
                                       1,
                                       shape=(1, latent_z_size, 1, 1),
                                       ctx=ctx)
        y_z = mx.nd.array(np.random.randint(0, 9, size=1), ctx=ctx)
        y_z = nd.one_hot(y_z, depth=10)

        img = netG(latent_z, y_z)

        plt.subplot(2, 4, i + 1)
        visualize(img[0])
    plt.show()
Exemplo n.º 10
0
 def validate(self):
     self.val_iter.reset()
     val_metrics = self.get_metrics()
     val_loss, val_num = 0, 0
     for i, batch in enumerate(self.val_iter):
         data, label = unpack_batch(batch, self.context)
         output = [self.net(x) for x in data]
         loss = [
             loss_func(o, nd.one_hot(l, self.K))
             for (o, l) in zip(output, label)
         ]
         val_loss += np.sum([nd.sum(L).asnumpy() for L in loss])
         val_num += batch.data[0].shape[0]
         for val_metric in val_metrics:
             val_metric.update(label, output)
     return val_loss / val_num, val_metrics
Exemplo n.º 11
0
    def predict_nd(self):

        self._random_data()
        
        if self.oldversion: pass
        else: self._reset_noise()
        
        prob_list = []
        label_list = []
        for batch_i, (data, label) in enumerate(self.test_data,):

            data = data.as_in_context(ctx).reshape((data.shape[0],self.num_channel,1,-1))
            label = label.as_in_context(ctx)
            label = nd.one_hot(label, self.model.output_dim).asnumpy()[:,1].tolist()
            output, _ = self.model.network(X=data)
            prob = transform_softmax(output)[:,1].asnumpy().tolist()
            prob_list.extend(prob)
            label_list.extend(label)
        return prob_list, label_list, output
Exemplo n.º 12
0
    def load_enum_states(self, fn):
        lines = open(fn, 'r').readlines()
        n_states = int(lines[0])
        dat_lst = []

        self.prob_states = nd.zeros([n_states], ctx=self.ctx)

        for i in range(1, n_states + 1):
            es = lines[i].strip().split()
            for v in range(self.n_vis):
                dat_lst.append(int(es[0][v]))
            self.prob_states[i - 1] = float(es[1])
            if self.prob_states[i - 1] < 1e-10:
                self.prob_states[i - 1] = 1e-10

        dat_lst = nd.array(dat_lst)

        self.enum_states = nd.one_hot(dat_lst, self.n_val).reshape(
            [-1, self.n_vis * self.n_val]).copyto(self.ctx)
        sys.stderr.write("Exact states info loaded!\n")
        return
Exemplo n.º 13
0
def get_data(fn, n_vis, n_val):
    if fn.isdigit():
        #random
        num_data = int(fn)
        prob = nd.ones([num_data * n_vis, n_val]) / n_val
        dat_lst = nd.sample_multinomial(prob)
        sys.stderr.write("Generating random data: nv= %d, nd= %d\n" %
                         (n_vis, num_data))
    else:
        #read from file
        with open(fn, 'r') as fp:
            lines = fp.readlines()
            es = lines[0].split()
            nl = int(es[0])
            nv = int(es[1])
            dat_lst = []
            sys.stderr.write("Loading data: nv= %d, nd= %d\n" % (nv, nl))
            for l in range(1, nl + 1):
                for i in range(nv):
                    dat_lst.append(int(lines[l][i]))
            dat_lst = nd.array(dat_lst)
    data = nd.one_hot(dat_lst, n_val).reshape([-1, n_vis * n_val])
    return data
def get_input_data(data,vocab_size):
    return [nd.one_hot(X,vocab_size).asnumpy() for X in data]
Exemplo n.º 15
0
 def forward(self, x):
     x = nd.one_hot(x, self.vsize)
     self.h1 = rnn(x, self.h1, self.W1, self.b1)
     return nd.dot(self.h1, self.Wy) + self.by
Exemplo n.º 16
0
epochs = 1000
moving_loss = 0
niter = 0
l2_strength = .1

loss_seq_train = []
loss_seq_test = []
acc_seq_train = []
acc_seq_test = []

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        label_one_hot = nd.one_hot(label, 10)
        with autograd.record():
            output = net(data)
            loss = cross_entropy(
                output, label_one_hot) + l2_strength * penalty_l2(params)
        loss.backward()
        SGD(params, 0.001)

        niter += 1
        moving_loss = 0.99 * moving_loss + .01 * nd.sum(loss).asscalar()
        est_loss = moving_loss / (1 - .99 * niter)

    test_accuracy, test_loss = evaluate_accuracy(test_data, net)
    train_accuracy, train_loss = evaluate_accuracy(train_data, net)

    # save them for later
Exemplo n.º 17
0
def GRU(epoch = 100 , batch_size=100, save_period=100 , load_period=100 ,learning_rate= 0.1, ctx=mx.gpu(0)):

    train_data , test_data = FashionMNIST(batch_size)

    #network parameter
    time_step = 28
    num_inputs = 28
    num_hidden = 200
    num_outputs = 10

    path = "weights/FashionMNIST_GRUweights-{}".format(load_period)

    if os.path.exists(path):

        print("loading weights")
        [wxz, wxr, wxh, whz, whr, whh, bz, br, bh, why, by] = nd.load(path)  # weights load
        wxz = wxz.as_in_context(ctx)
        wxr = wxr.as_in_context(ctx)
        whz = whz.as_in_context(ctx)


        whz = whz.as_in_context(ctx)
        whr = whr.as_in_context(ctx)
        whh = whh.as_in_context(ctx)

        bz = bz.as_in_context(ctx)
        br = br.as_in_context(ctx)
        bh = bh.as_in_context(ctx)

        why = why.as_in_context(ctx)
        by = by.as_in_context(ctx)
        params = [wxz , wxr , wxh , whz, whr, whh, bz, br, bh, why , by]

    else:
        print("initializing weights")

        with ctx:
            wxz = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs))
            wxr = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs))
            wxh = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_inputs))

            whz = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden))
            whr = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden))
            whh = nd.random.normal(loc=0, scale=0.01, shape=(num_hidden, num_hidden))

            bz = nd.random.normal(loc=0,scale=0.01,shape=(num_hidden,))
            br = nd.random.normal(loc=0,scale=0.01,shape=(num_hidden,))
            bh = nd.random.normal(loc=0,scale=0.01,shape=(num_hidden,))

            why = nd.random.normal(loc=0,scale=0.1,shape=(num_outputs , num_hidden))
            by = nd.random.normal(loc=0,scale=0.1,shape=(num_outputs,))

        params = [wxz , wxr , wxh , whz, whr, whh, bz, br, bh, why , by]

    # attach gradient!!!
    for param in params:
        param.attach_grad()

    #Fully Neural Network with 1 Hidden layer
    def GRU_Cell(input, state):
        for x in input:
            z_t = nd.Activation(nd.FullyConnected(data=x,weight=wxz,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=state,weight=whz,no_bias=True,num_hidden=num_hidden)+bz,act_type="sigmoid")
            r_t = nd.Activation(nd.FullyConnected(data=x,weight=wxr,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=state,weight=whr,no_bias=True,num_hidden=num_hidden)+br,act_type="sigmoid")
            g_t = nd.Activation(nd.FullyConnected(data=x,weight=wxh,no_bias=True,num_hidden=num_hidden)+
                                nd.FullyConnected(data=r_t*state,weight=whh,no_bias=True,num_hidden=num_hidden)+bh,act_type="tanh")

            state = nd.multiply(z_t,state) + nd.multiply(1-z_t,g_t)

        output = nd.FullyConnected(data=state, weight=why, bias=by, num_hidden=num_outputs)
        output = nd.softmax(data=output)
        return output, state

    def cross_entropy(output, label):
        return - nd.sum(label * nd.log(output), axis=0 , exclude=True)

    #Adam optimizer
    state=[]
    optimizer=mx.optimizer.Adam(rescale_grad=1,learning_rate=learning_rate)

    for param in params:
        state.append(optimizer.create_state(0,param))

    for i in tqdm(range(1,epoch+1,1)):

        for data,label in train_data:

            states = nd.zeros(shape=(data.shape[0], num_hidden), ctx=ctx)
            data = data.as_in_context(ctx)
            data = data.reshape(shape=(-1,time_step,num_inputs))
            data=nd.transpose(data=data,axes=(1,0,2))
            label = label.as_in_context(ctx)
            label = nd.one_hot(label , num_outputs)

            with autograd.record():
                outputs, states = GRU_Cell(data, states)
                loss = cross_entropy(outputs,label) # (batch_size,)
            loss.backward()

            cost = nd.mean(loss).asscalar()
            for j,param in enumerate(params):
                optimizer.update(0,param,param.grad,state[j])

        test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs, num_hidden, GRU_Cell, ctx)
        print(" epoch : {} , last batch cost : {}".format(i,cost))
        print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))

        #weight_save
        if i % save_period==0:
            if not os.path.exists("weights"):
                os.makedirs("weights")
            print("saving weights")
            nd.save("weights/FashionMNIST_GRUweights-{}".format(i),params)

    test_accuracy = evaluate_accuracy(test_data, time_step, num_inputs, num_hidden, GRU_Cell, ctx)
    print("Test_acc : {0:0.3f}%".format(test_accuracy * 100))
    return "optimization completed"
Exemplo n.º 18
0
def Train(train,
          test,
          Debug,
          batch_size,
          lr,
          smoothing_constant,
          num_fc1,
          num_fc2,
          num_outputs,
          epochs,
          SNR,
          sl,
          pool_type,
          pool_size,
          pool_stride,
          params_init=None,
          period=None):

    num_examples = train.shape[0]
    # 训练集数据类型转换
    y = nd.array(~train.sigma.isnull() + 0)
    X = nd.array(
        Normolise(
            train.drop([
                'mass', 'positions', 'gaps', 'max_peak', 'sigma', 'SNR_mf',
                'SNR_mf0'
            ],
                       axis=1)))
    print('Label for training:', y.shape)
    print('Dataset for training:', X.shape, end='\n\n')

    dataset_train = gluon.data.ArrayDataset(X, y)
    train_data = gluon.data.DataLoader(dataset_train,
                                       batch_size,
                                       shuffle=True,
                                       last_batch='discard')

    y = nd.array(~test.sigma.isnull() + 0)
    X = nd.array(
        Normolise(
            test.drop([
                'mass', 'positions', 'gaps', 'max_peak', 'sigma', 'SNR_mf',
                'SNR_mf0'
            ],
                      axis=1)))
    print('Label for testing:', y.shape)
    print('Dataset for testing:', X.shape, end='\n\n')

    # 这里使用data模块来读取数据。创建测试数据。  (不shuffle)
    dataset_test = gluon.data.ArrayDataset(X, y)
    test_data = gluon.data.DataLoader(dataset_test,
                                      batch_size,
                                      shuffle=True,
                                      last_batch='discard')

    # Train
    loss_history = []
    loss_v_history = []
    moving_loss_history = []
    test_accuracy_history = []
    train_accuracy_history = []

    #     assert period >= batch_size and period % batch_size == 0

    # Initializate parameters
    if params_init:
        print('Loading params...')
        params = params_init

        #         [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7] = params

        #         # random fc layers
        #         weight_scale = .01

        #         W5 = nd.random_normal(loc=0, scale=weight_scale, shape=(sl, num_fc1), ctx=ctx )
        #         W6 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc1, num_fc2), ctx=ctx )
        #         W7 = nd.random_normal(loc=0, scale=weight_scale, shape=(num_fc2, num_outputs), ctx=ctx )
        #         b5 = nd.random_normal(shape=num_fc1, scale=weight_scale, ctx=ctx)
        #         b6 = nd.random_normal(shape=num_fc2, scale=weight_scale, ctx=ctx)
        #         b7 = nd.random_normal(shape=num_outputs, scale=weight_scale, ctx=ctx)

        #         params = [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5]
        #         print('Random the FC1&2-layers...')

        vs = []
        sqrs = []
        for param in params:
            param.attach_grad()
            vs.append(param.zeros_like())
            sqrs.append(param.zeros_like())
    else:
        params, vs, sqrs = init_params(num_fc1=128,
                                       num_fc2=64,
                                       num_outputs=2,
                                       sl=sl)
        print('Initiate weights from random...')

    # Debug
    if Debug:
        print('Debuging...')
        if params_init:
            params = params_init
        else:
            params, vs, sqrs = init_params(num_fc1=128,
                                           num_fc2=64,
                                           num_outputs=2,
                                           sl=sl)
        for data, _ in train_data:
            data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
            break
        _, _ = net_PLB(data,
                       params,
                       debug=Debug,
                       pool_type=pool_type,
                       pool_size=pool_size,
                       pool_stride=pool_stride)
        print()


#     total_loss = [Total_loss(train_data_10, params, batch_size, num_outputs)]

    t = 0
    #   Epoch starts from 1.
    print('pool_type: ', pool_type)
    print('pool_size: ', pool_size)
    print('pool_stride: ', pool_stride)
    print('sl: ', sl)
    best_test_acc = 0
    best_params_epoch = 0

    for epoch in range(1, epochs + 1):
        Epoch_loss = []
        #         学习率自我衰减。
        if epoch > 2:
            #             lr *= 0.1
            lr /= (1 + 0.01 * epoch)

        for batch_i, ((data, label),
                      (data_v,
                       label_v)) in enumerate(zip(train_data, test_data)):
            data = data.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
            label = label.as_in_context(ctx)
            label_one_hot = nd.one_hot(label, num_outputs)
            with autograd.record():
                output, _ = net_PLB(data,
                                    params,
                                    pool_type=pool_type,
                                    pool_size=pool_size,
                                    pool_stride=pool_stride)
                loss = softmax_cross_entropy(output, label_one_hot)
            loss.backward()
            #             print(output)
            #             sgd(params, lr, batch_size)

            #           Increment t before invoking adam.
            t += 1
            adam(params, vs, sqrs, lr, batch_size, t)

            data_v = data_v.as_in_context(ctx).reshape((batch_size, 1, 1, -1))
            label_v = label_v.as_in_context(ctx)
            label_v_one_hot = nd.one_hot(label_v, num_outputs)
            output_v, _ = net_PLB(data_v,
                                  params,
                                  pool_type=pool_type,
                                  pool_size=pool_size,
                                  pool_stride=pool_stride)
            loss_v = softmax_cross_entropy(output_v, label_v_one_hot)

            #             #########################
            #              Keep a moving average of the losses
            #             #########################
            curr_loss = nd.mean(loss).asscalar()
            curr_loss_v = nd.mean(loss_v).asscalar()
            moving_loss = (curr_loss if
                           ((batch_i == 0) and (epoch - 1 == 0)) else
                           (1 - smoothing_constant) * moving_loss +
                           (smoothing_constant) * curr_loss)

            loss_history.append(curr_loss)
            loss_v_history.append(curr_loss_v)
            moving_loss_history.append(moving_loss)
            Epoch_loss.append(curr_loss)
            #             if batch_i * batch_size % period == 0:
            #                 print('Curr_loss: ', curr_loss)

            # print('Working on epoch %d. Curr_loss: %.5f (complete percent: %.2f/100' %(epoch, curr_loss*1.0, 1.0 * batch_i / (num_examples//batch_size) * 100) +')' , end='')
            # sys.stdout.write("\r")
            # print('{"metric": "Training Loss for ALL", "value": %.5f}' %(curr_loss*1.0) )
            # print('{"metric": "Testing Loss for ALL", "value": %.5f}' %(curr_loss_v*1.0) )
            print('{"metric": "Training Loss for SNR=%s", "value": %.5f}' %
                  (str(SNR), curr_loss * 1.0))
            print('{"metric": "Testing Loss for SNR=%s", "value": %.5f}' %
                  (str(SNR), curr_loss_v * 1.0))
        test_accuracy = evaluate_accuracy(test_data,
                                          num_examples,
                                          batch_size,
                                          params,
                                          net_PLB,
                                          pool_type=pool_type,
                                          pool_size=pool_size,
                                          pool_stride=pool_stride)
        train_accuracy = evaluate_accuracy(train_data,
                                           num_examples,
                                           batch_size,
                                           params,
                                           net_PLB,
                                           pool_type=pool_type,
                                           pool_size=pool_size,
                                           pool_stride=pool_stride)
        test_accuracy_history.append(test_accuracy)
        train_accuracy_history.append(train_accuracy)

        if test_accuracy >= best_test_acc:
            best_test_acc = test_accuracy
            best_params_epoch = epoch

        # print("Epoch %d, Moving_loss: %.6f, Epoch_loss(mean): %.6f, Train_acc %.4f, Test_acc %.4f" %
        # (epoch, moving_loss, np.mean(Epoch_loss), train_accuracy, test_accuracy))
        print('{"metric": "Train_acc. for SNR=%s in epoches", "value": %.4f}' %
              (str(SNR), train_accuracy))
        print('{"metric": "Test_acc. for SNR=%s in epoches", "value": %.4f}' %
              (str(SNR), test_accuracy))
        yield (params, loss_history, loss_v_history, moving_loss_history,
               test_accuracy_history, train_accuracy_history,
               best_params_epoch)
Exemplo n.º 19
0
def CNN(epoch = 100 , batch_size=256, save_period=10 , load_period=100 , weight_decay=0.001 ,learning_rate= 0.1 , dataset = "MNIST", ctx=mx.cpu(0)):

    #only for fullynetwork , 2d convolution
    def BN(X,gamma,beta,momentum=0.9,eps=1e-5,scope_name="",is_training=True):

        if len(X.shape)==2 :
            mean = nd.mean(X,axis=0)
            variance = nd.mean(nd.square(X-mean),axis=0)

            if is_training:
                Normalized_X=(X-mean)/nd.sqrt(variance+eps)
            elif is_training==False and not os.path.exists(path1) and epoch==0: #not param
                Normalized_X = (X - mean) / nd.sqrt(variance + eps)
            else:
                Normalized_X=(X-MOVING_MEANS[scope_name] / nd.sqrt(MOVING_VARS[scope_name]+eps))

            out=gamma*Normalized_X+beta

        #pay attention that when it comes to (2D) CNN , We normalize batch_size * height * width over each channel, so that gamma and beta have the lengths the same as channel_count ,
        #referenced by http://gluon.mxnet.io/chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.html
        elif len(X.shape)==4:
            N , C , H , W = X.shape

            mean = nd.mean(X , axis=(0,2,3)) #normalize batch_size * height * width over each channel
            variance = nd.mean(nd.square(X-mean.reshape((1,C,1,1))),axis=(0,2,3))

            if is_training:
                Normalized_X = (X-mean.reshape((1,C,1,1)))/nd.sqrt(variance.reshape((1,C,1,1))+eps)
            elif is_training == False and not os.path.exists(path1) and epoch==0:  # load param , when epoch=0
                Normalized_X = (X-mean.reshape((1,C,1,1)))/nd.sqrt(variance.reshape((1,C,1,1))+eps)
            else:
                Normalized_X = (X - MOVING_MEANS[scope_name].reshape((1, C, 1, 1))) / nd.sqrt(MOVING_VARS[scope_name].reshape((1, C, 1, 1)) + eps)

            out=gamma.reshape((1,C,1,1))*Normalized_X+beta.reshape((1,C,1,1))

        if scope_name not in MOVING_MEANS and scope_name not in MOVING_VARS:
            MOVING_MEANS[scope_name] = mean
            MOVING_VARS[scope_name] = variance
        else:
            MOVING_MEANS[scope_name] = MOVING_MEANS[scope_name] * momentum + mean * (1.0 - momentum)
            MOVING_VARS[scope_name] = MOVING_VARS[scope_name] * momentum + variance * (1.0 - momentum)

        return out

    #data selection
    if dataset =="MNIST":
        train_data , test_data = MNIST(batch_size)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
    else:
        return "The dataset does not exist."

    # data structure
    if dataset == "MNIST" or dataset =="FashionMNIST":
        color = 1
    elif dataset == "CIFAR10":
        color = 3
    num_outputs = 10

    if dataset == "MNIST":
        path1 = "weights/MNIST_weights-{}".format(load_period)
        path2 = "weights/MNIST_weights_MEANS-{}".format(load_period)
        path3 = "weights/MNIST_weights_VARS-{}".format(load_period)
    elif dataset == "FashionMNIST":
        path1 = "weights/FashionMNIST_weights-{}".format(load_period)
        path2 = "weights/FashionMNIST_weights_MEANS-{}".format(load_period)
        path3 = "weights/FashionMNIST_weights_VARS-{}".format(load_period)
    elif dataset == "CIFAR10":
        path1 = "weights/CIFAR10_weights-{}".format(load_period)
        path2 = "weights/CIFAR10_weights_MEANS-{}".format(load_period)
        path3 = "weights/CIFAR10_weights_VARS-{}".format(load_period)

    if os.path.exists(path1):

        print("loading weights")
        [W1, B1, gamma1, beta1, W2, B2, gamma2, beta2, W3, B3, gamma3, beta3, W4, B4, gamma4, beta4, W5, B5]= nd.load(path1)  # weights load
        MOVING_MEANS = nd.load(path2)
        MOVING_VARS = nd.load(path3)

        for m,v in zip(MOVING_MEANS.values() , MOVING_VARS.values()):
            m.as_in_context(ctx)
            v.as_in_context(ctx)

        W1=W1.as_in_context(ctx)
        B1=B1.as_in_context(ctx)
        gamma1=gamma1.as_in_context(ctx)
        beta1=beta1.as_in_context(ctx)
        W2=W2.as_in_context(ctx)
        B2=B2.as_in_context(ctx)
        gamma2=gamma2.as_in_context(ctx)
        beta2=beta2.as_in_context(ctx)
        W3=W3.as_in_context(ctx)
        B3=B3.as_in_context(ctx)
        gamma3=gamma3.as_in_context(ctx)
        beta3=beta3.as_in_context(ctx)
        W4=W4.as_in_context(ctx)
        B4=B4.as_in_context(ctx)
        gamma4=gamma4.as_in_context(ctx)
        beta4=beta4.as_in_context(ctx)
        W5=W5.as_in_context(ctx)
        B5=B5.as_in_context(ctx)

        params = [W1 , B1 , gamma1 , beta1 , W2 , B2 , gamma2 , beta2 , W3 , B3 , gamma3 , beta3 , W4 , B4, gamma4 , beta4 , W5 , B5]

    else:

        print("initializing weights")
        weight_scale=0.1
        BN_weight_scale = 0.01

        MOVING_MEANS, MOVING_VARS = {}, {}

        with ctx:
            W1 = nd.random.normal(loc=0 , scale=weight_scale , shape=(60,color,3,3))
            B1 = nd.random.normal(loc=0 , scale=weight_scale , shape=60)

            gamma1 = nd.random.normal(shape=60, loc=1, scale=BN_weight_scale)
            beta1 = nd.random.normal(shape=60, scale=BN_weight_scale)

            W2 = nd.random.normal(loc=0 , scale=weight_scale , shape=(30,60,6,6))
            B2 = nd.random.normal(loc=0 , scale=weight_scale , shape=30)

            gamma2 = nd.random.normal(shape=30, loc=1, scale=BN_weight_scale)
            beta2 = nd.random.normal(shape=30, scale=BN_weight_scale)

            if dataset == "CIFAR10":
                reshape=750
            elif dataset == "MNIST" or dataset == "FashionMNIST":
                reshape=480

            W3 = nd.random.normal(loc=0 , scale=weight_scale , shape=(120, reshape))
            B3 = nd.random.normal(loc=0 , scale=weight_scale , shape=120)

            gamma3 = nd.random.normal(shape=120, loc=1, scale=BN_weight_scale)
            beta3 = nd.random.normal(shape=120, scale=BN_weight_scale)

            W4 = nd.random.normal(loc=0 , scale=weight_scale , shape=(64, 120))
            B4 = nd.random.normal(loc=0 , scale=weight_scale , shape=64)

            gamma4 = nd.random.normal(shape=64, loc=1, scale=BN_weight_scale)
            beta4 = nd.random.normal(shape=64, scale=BN_weight_scale)

            W5 = nd.random.normal(loc=0 , scale=weight_scale , shape=(num_outputs , 64))
            B5 = nd.random.normal(loc=0 , scale=weight_scale , shape=num_outputs)

        params = [W1 , B1 , gamma1 , beta1 , W2 , B2 , gamma2 , beta2 , W3 , B3 , gamma3 , beta3 , W4 , B4, gamma4 , beta4 , W5 , B5]

    # attach gradient!!!
    for i, param in enumerate(params):
        param.attach_grad()

    # network - similar to lenet5

    '''Convolution parameter
    data: (batch_size, channel, height, width)
    weight: (num_filter, channel, kernel[0], kernel[1])
    bias: (num_filter,)
    out: (batch_size, num_filter, out_height, out_width).
    '''

    def network(X, is_training=True, drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        C_H1=nd.Activation(data=BN(nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1) , num_filter=60), gamma1 , beta1 ,scope_name="BN1",is_training=is_training) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30)
        P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2=nd.Activation(data=BN(nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), gamma2 , beta2 ,scope_name="BN2",is_training=is_training), act_type="relu") # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)

        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 =nd.Activation(BN(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120), gamma3, beta3 ,scope_name="BN3",is_training=is_training),act_type="relu")
        F_H1 =nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 =nd.Activation(BN(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64), gamma4, beta4, scope_name="BN4",is_training=is_training),act_type="relu")
        F_H2 =nd.Dropout(data=F_H2, p=drop_rate)
        softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10))
        return softmax_Y

    def cross_entropy(output, label):
        return - nd.sum(label * nd.log(output), axis=1)

    #Adam optimizer
    state=[]
    optimizer=mx.optimizer.Adam(rescale_grad=1,learning_rate=learning_rate)
    for i,param in enumerate(params):
        state.append(optimizer.create_state(0,param))

    def SGD(params, lr , wd , bs):
        for param in params:
             param -= ((lr * param.grad)/bs+wd*param)

    for i in tqdm(range(1,epoch+1,1)):
        for data,label in train_data:
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            label = nd.one_hot(label , num_outputs)

            with autograd.record():
                output = network(data,is_training=True,drop_rate=0.0)

                #loss definition
                loss = cross_entropy(output,label) # (batch_size,)
                cost = nd.mean(loss).asscalar()
            loss.backward()

            for j,param in enumerate(params):
                optimizer.update(0,param,param.grad,state[j])

            #SGD(params, learning_rate , weight_decay , batch_size)

        print(" epoch : {} , last batch cost : {}".format(i,cost))

        #weight_save
        if i % save_period==0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset=="MNIST":
                nd.save("weights/MNIST_weights-{}".format(i), params)
                nd.save("weights/MNIST_weights_MEANS-{}".format(i), MOVING_MEANS)
                nd.save("weights/MNIST_weights_VARS-{}".format(i), MOVING_VARS)

            elif dataset=="CIFAR10":
                nd.save("weights/CIFAR10_weights-{}".format(i), params)
                nd.save("weights/CIFAR10_weights_MEANS-{}".format(i), MOVING_MEANS)
                nd.save("weights/CIFAR10_weights_VARS-{}".format(i), MOVING_VARS)

            elif dataset=="FashionMNIST":
                nd.save("weights/FashionMNIST_weights-{}".format(i),params)
                nd.save("weights/FashionMNIST_weights_MEANS-{}".format(i), MOVING_MEANS)
                nd.save("weights/FashionMNIST_weights_VARS-{}".format(i), MOVING_VARS)

    test_accuracy = evaluate_accuracy(test_data , network , ctx)
    print("Test_acc : {}".format(test_accuracy))

    return "optimization completed"
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        for i, _module in enumerate(self._arcface_modules):
          _fc7 = _module.get_outputs(merge_multi_context=True)[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0

        loss = nd.zeros(shape=(self._batch_size), ctx=self._ctx_cpu)
        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          
          #for debug
          loss -= (mx.nd.sum(mx.nd.log(fc7_outs[i]) * onehot_label, axis=1)).as_in_context(self._ctx_cpu)
          fc7_outs[i] -= onehot_label
          _module.backward(out_grads = [fc7_outs[i]])
          print('for debug, fc7 outs max is ', i, mx.nd.max(fc7_outs[i]))
          print('for debug, fc7 outs min is ', i, mx.nd.min(fc7_outs[i]))
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad
          print('for debug, global fc1_grad max is ', i, mx.nd.max(ctx_fc1_grad))
          print('for debug, ctx fc1 grad shape, ', ctx_fc1_grad.shape)

        global_fc1_grad = local_fc1_grad
        #  global_fc1_grad = mx.nd.clip(local_fc1_grad, a_min=-15, a_max=15)
        print('for debug, after clip global fc1_grad max is ', mx.nd.max(global_fc1_grad))
        self._curr_module.backward(out_grads = [global_fc1_grad])
        # for debug
        return mx.nd.sum(loss)
Exemplo n.º 21
0
def muitlclass_logistic_regression(epoch=100,
                                   batch_size=10,
                                   save_period=10,
                                   load_period=100,
                                   weight_decay=0.001,
                                   learning_rate=0.1,
                                   dataset="MNIST",
                                   ctx=mx.gpu(0)):

    #data selection
    if dataset == "MNIST":
        train_data, test_data = MNIST(batch_size)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
    else:
        return "The dataset does not exist."

    # data structure
    if dataset == "MNIST" or dataset == "FashionMNIST":
        num_inputs = 28 * 28
    elif dataset == "CIFAR10":
        num_inputs = 32 * 32
    num_outputs = 10

    if dataset == "MNIST":
        path = "weights/MNIST_weights-{}".format(load_period)
    elif dataset == "FashionMNIST":
        path = "weights/FashionMNIST_weights-{}".format(load_period)
    elif dataset == "CIFAR10":
        path = "weights/CIFAR10_weights-{}".format(load_period)

    if os.path.exists(path):
        print("loading weights")
        [W, B] = nd.load(path)  # weights load
        W = W.as_in_context(ctx)
        B = B.as_in_context(ctx)
        params = [W, B]
    else:
        print("initializing weights")
        with ctx:
            W = nd.random.normal(loc=0,
                                 scale=0.01,
                                 shape=(num_inputs, num_outputs))
            B = nd.random.normal(loc=0, scale=0.01, shape=num_outputs)
        params = [W, B]

    # attach gradient!!!
    for i, param in enumerate(params):
        param.attach_grad()

    def network(X):
        Y = nd.dot(X, W) + B
        softmax_Y = nd.softmax(Y)
        return softmax_Y

    def cross_entropy(output, label):
        return -nd.sum(label * nd.log(output), axis=1)

    #Adam optimizer
    state = []
    optimizer = mx.optimizer.Adam(rescale_grad=1, learning_rate=learning_rate)
    for i, param in enumerate(params):
        state.append(optimizer.create_state(0, param))

    def SGD(params, lr, wd, bs):
        for param in params:
            param -= ((lr * param.grad) / bs + wd * param)

    for i in tqdm(range(1, epoch + 1, 1)):
        for data, label in train_data:
            if dataset == "CIFAR10":
                data = nd.slice_axis(data=data, axis=3, begin=0, end=1)
            data = data.as_in_context(ctx).reshape((-1, num_inputs))
            label = label.as_in_context(ctx)
            label = nd.one_hot(label, num_outputs)

            with autograd.record():
                output = network(data)

                #loss definition
                loss = cross_entropy(output, label)  # (batch_size,)
                cost = nd.mean(loss).asscalar()

            loss.backward()

            for j, param in enumerate(params):
                optimizer.update(0, param, param.grad, state[j])

            #SGD(params, learning_rate , weight_decay , batch_size)

        print(" epoch : {} , last batch cost : {}".format(i, cost))

        #weight_save
        if i % save_period == 0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset == "MNIST":
                nd.save("weights/MNIST_weights-{}".format(i), params)

            elif dataset == "CIFAR10":
                nd.save("weights/CIFAR10_weights-{}".format(i), params)

            elif dataset == "FashionMNIST":
                nd.save("weights/FashionMNIST_weights-{}".format(i), params)

    test_accuracy = evaluate_accuracy(test_data, num_inputs, network, ctx,
                                      dataset)
    print("Test_acc : {}".format(test_accuracy))

    return "optimization completed"
Exemplo n.º 22
0
import numpy as np
 def clsmap2channel(self, x):
     y = ndarray.one_hot(x, 11)
     y = ndarray.transpose(y, (2, 0, 1))
     return y
Exemplo n.º 24
0
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        ## ============= forward classifier layer ===========
        fc7_outs = []
        for i, _module in enumerate(self._arcface_modules):
            _fc7 = _module.get_outputs(merge_multi_context=True)[0]
            fc7_outs.append(_fc7)

        ctx_max = map(
            lambda fc7_out: nd.max(fc7_out, axis=1, keepdims=True).
            as_in_context(self._ctx_single_gpu), fc7_outs)
        local_fc7_max = nd.max(nd.concat(*ctx_max, dim=1),
                               axis=1,
                               keepdims=True)
        fc7_exps = list(
            map(
                lambda fc7_out: nd.exp(fc7_out - local_fc7_max.as_in_context(
                    fc7_out.context)), fc7_outs))
        ctx_sum = map(
            lambda fc7_out: nd.sum(fc7_out, axis=1, keepdims=True).
            as_in_context(self._ctx_single_gpu), fc7_exps)
        exp_sum = nd.sum(nd.concat(*ctx_sum, dim=1), axis=1, keepdims=True)
        softmax_outs = list(
            map(
                lambda fc7_exp: nd.broadcast_div(
                    fc7_exp, exp_sum.as_in_context(fc7_exp.context)),
                fc7_exps))

        onehot_device_labels = [
            nd.one_hot((self.global_label).as_in_context(device) -
                       self._ctx_class_start[i],
                       depth=self._ctx_num_classes,
                       on_value=1.0,
                       off_value=0.0) for i, device in enumerate(self._context)
        ]

        ## ============= verbose train accuracy and loss ===========
        if self._iter % self._verbose == 0:
            local_label = self.global_label - self._local_class_start

            fc7_pred = self.parall_argmax(softmax_outs, self._ctx_single_gpu)
            _pred = nd.equal(fc7_pred, local_label).asnumpy()[0]

            loss = self.parall_loss(softmax_outs, onehot_device_labels,
                                    self._ctx_single_gpu).asscalar()
            assert not math.isnan(loss)

            self.logger.info(
                '[Iter {}] train acc : {}, total loss : {}'.format(
                    self._iter, np.mean(_pred), loss))

        ## ============= backward large weight classifier layer with gradient ===========
        local_fc1_grad = self.get_ndarray_by_shape(
            self._ctx_single_gpu, 'local_fc1_grad',
            (self._batch_size, self._emb_size))
        local_fc1_grad[:, :] = 0.0
        for i, _module in enumerate(self._arcface_modules):
            _module.backward(
                out_grads=[softmax_outs[i] - onehot_device_labels[i]])
            ctx_fc1_grad = self.get_ndarray_by_v_arr(
                self._ctx_single_gpu, 'ctx_fc1_grad_%d' % i,
                _module.get_input_grads()[0])
            local_fc1_grad += ctx_fc1_grad

        ## ============= backward backbone ===============
        global_fc1_grad = local_fc1_grad
        self._backbone_module.backward(out_grads=[global_fc1_grad])
Exemplo n.º 25
0
 def forward(self, x):
     x = nd.one_hot(x, self.vsize)
     self.h1 = gru(x, self.h1, *self.a1)
     return nd.dot(self.h1, self.Wy) + self.by
Exemplo n.º 26
0
 def forward(self, x):
     x = nd.one_hot(x, self.vsize)
     self.s1 = lstm(x, *self.s1, *self.a1)
     return nd.dot(self.s1[0], self.Wy) + self.by
Exemplo n.º 27
0
def CNN(epoch = 100 , batch_size=10, save_period=10 , load_period=100 , weight_decay=0.001 ,learning_rate= 0.1 , dataset = "MNIST", ctx=mx.cpu(0)):

    #data selection
    if dataset =="MNIST":
        train_data , test_data = MNIST(batch_size)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
    else:
        return "The dataset does not exist."


    # data structure
    if dataset == "MNIST" or dataset =="FashionMNIST":
        color = 1
    elif dataset == "CIFAR10":
        color = 3
    num_outputs = 10

    if dataset == "MNIST":
        path = "weights/MNIST_weights-{}".format(load_period)
    elif dataset == "FashionMNIST":
        path = "weights/FashionMNIST_weights-{}".format(load_period)
    elif dataset == "CIFAR10":
        path = "weights/CIFAR10_weights-{}".format(load_period)

    if os.path.exists(path):
        print("loading weights")
        [W1, B1, W2, B2, W3, B3, W4, B4, W5, B5] = nd.load(path)  # weights load

        W1=W1.as_in_context(ctx)
        B1=B1.as_in_context(ctx)
        W2=W2.as_in_context(ctx)
        B2=B2.as_in_context(ctx)
        W3=W3.as_in_context(ctx)
        B3=B3.as_in_context(ctx)
        W4=W4.as_in_context(ctx)
        B4=B4.as_in_context(ctx)
        W5=W5.as_in_context(ctx)
        B5=B5.as_in_context(ctx)

        params = [W1 , B1 , W2 , B2 , W3 , B3 , W4 , B4 , W5 , B5]
    else:
        print("initializing weights")
        with ctx:
            W1 = nd.random.normal(loc=0 , scale=0.1 , shape=(60,color,3,3))
            B1 = nd.random.normal(loc=0 , scale=0.1 , shape=60)

            W2 = nd.random.normal(loc=0 , scale=0.1 , shape=(30,60,6,6))
            B2 = nd.random.normal(loc=0 , scale=0.1 , shape=30)

            if dataset == "CIFAR10":
                reshape=750
            elif dataset == "MNIST" or dataset == "FashionMNIST":
                reshape=480

            W3 = nd.random.normal(loc=0 , scale=0.1 , shape=(120, reshape))
            B3 = nd.random.normal(loc=0 , scale=0.1 , shape=120)

            W4 = nd.random.normal(loc=0 , scale=0.1 , shape=(64, 120))
            B4 = nd.random.normal(loc=0 , scale=0.1 , shape=64)

            W5 = nd.random.normal(loc=0 , scale=0.1 , shape=(num_outputs , 64))
            B5 = nd.random.normal(loc=0 , scale=0.1 , shape=num_outputs)

        params = [W1 , B1 , W2 , B2 , W3 , B3 , W4 , B4, W5 , B5]
        
    # attach gradient!!!
    for i, param in enumerate(params):
        param.attach_grad()

    # network - similar to lenet5 

    '''Convolution parameter
    data: (batch_size, channel, height, width)
    weight: (num_filter, channel, kernel[0], kernel[1])
    bias: (num_filter,)
    out: (batch_size, num_filter, out_height, out_width).
    '''

    def network(X,drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size 
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        C_H1=nd.Activation(data= nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1)  , num_filter=60) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) 
        P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2=nd.Activation(data= nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), act_type="relu") # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)

        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 =nd.Activation(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120),act_type="sigmoid")
        F_H1 =nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 =nd.Activation(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64),act_type="sigmoid")
        F_H2 =nd.Dropout(data=F_H2, p=drop_rate)
        softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10))
        return softmax_Y

    def cross_entropy(output, label):
        return - nd.sum(label * nd.log(output), axis=1)

    #Adam optimizer
    state=[]
    optimizer=mx.optimizer.Adam(rescale_grad=1,learning_rate=learning_rate)
    for i,param in enumerate(params):
        state.append(optimizer.create_state(0,param))

    def SGD(params, lr , wd , bs):
        for param in params:
             param -= ((lr * param.grad)/bs+wd*param)

    for i in tqdm(range(1,epoch+1,1)):
        for data,label in train_data:
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            label = nd.one_hot(label , num_outputs)

            with autograd.record():
                output = network(data,drop_rate=0.2)

                #loss definition
                loss = cross_entropy(output,label) # (batch_size,)
                cost = nd.mean(loss).asscalar()

            loss.backward()
            for j,param in enumerate(params):
                optimizer.update(0,param,param.grad,state[j])

            #SGD(params, learning_rate , weight_decay , batch_size)

        print(" epoch : {} , last batch cost : {}".format(i,cost))

        #weight_save
        if i % save_period==0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset=="MNIST":
                nd.save("weights/MNIST_weights-{}".format(i),params)

            elif dataset=="CIFAR10":
                nd.save("weights/CIFAR10_weights-{}".format(i),params)

            elif dataset=="FashionMNIST":
                nd.save("weights/FashionMNIST_weights-{}".format(i),params)

    test_accuracy = evaluate_accuracy(test_data , network , ctx)
    print("Test_acc : {}".format(test_accuracy))

    return "optimization completed"
Exemplo n.º 28
0
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        arcface_module_outputs = []
        for i, _module in enumerate(self._arcface_modules):
          #_fc7 = _module.get_outputs(merge_multi_context=True)[0]
          out = _module.get_outputs(merge_multi_context=True)
          #print(out[0].shape)
          #print(out[1].shape)
          arcface_module_outputs.append(out)
          _fc7 = out[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0
        total_eloss = []
        celoss_verbose = 1000
        if self._iter%celoss_verbose==0:
          fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,))
          fc7_celoss[:] = 0.0

        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          #print(fc7_outs[i].shape, onehot_label.shape)

          if self._iter%celoss_verbose==0:
            _ce_loss = fc7_outs[i] * onehot_label
            _ce_loss = nd.sum(_ce_loss, axis=1)
            fc7_celoss += _ce_loss.as_in_context(tmp_ctx)
          fc7_outs[i] -= onehot_label

          out = arcface_module_outputs[i]
          out_grads = [fc7_outs[i]]
          for j in range(1, len(out)):
              eloss = out[j]
              #print('eloss%d:'%j, eloss.shape)
              #print(out_grads[0].shape)
              #egrad_shape = (out_grads[0].shape[0], eloss.shape[0])
              egrad_shape = eloss.shape
              egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape)
              #egrad[:][:] = 1.0/egrad_shape[0]
              egrad[:][:] = 1.0
              out_grads.append(egrad)
              if self._iter%self._verbose==0:
                  total_eloss.append(np.mean(eloss.asnumpy()))

          _module.backward(out_grads = out_grads)
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad

        if self._iter%self._verbose==0 and len(total_eloss)>0:
          print('{eloss}', self._iter, np.mean(total_eloss))
        #if self._iter%self._verbose==0:
        if self._iter%celoss_verbose==0:
          ce_loss = nd.log(fc7_celoss) * -1.0
          ce_loss = nd.mean(ce_loss)
          print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy()))

        global_fc1_grad = local_fc1_grad
        self._curr_module.backward(out_grads = [global_fc1_grad])
Exemplo n.º 29
0
def train():
    for epoch in range(num_epochs):
        btic = time.time()
        i = 0
        #import pdb
        #pdb.set_trace()

        for data, labels in test_data:
            real_label = nd.ones([
                labels.shape[0],
            ], ctx=ctx)
            fake_label = nd.zeros([labels.shape[0]], ctx=ctx)
            labels = labels.as_in_context(ctx)
            x = data.as_in_context(ctx)

            y = nd.one_hot(labels, depth=10)
            #z = mx.nd.random_normal(0, 1, shape=(batch_size, latent_z_size, 1, 1), ctx=ctx)
            z = mx.nd.random_normal(0,
                                    1,
                                    shape=(labels.shape[0], latent_z_size, 1,
                                           1),
                                    ctx=ctx)

            #y_z = mx.nd.array(np.random.randint(0, 9, size=batch_size), ctx=ctx)
            y_z = mx.nd.array(np.random.randint(0, 9, size=labels.shape[0]),
                              ctx=ctx)
            y_z = nd.one_hot(y_z, depth=10)

            # Train Discriminator
            with autograd.record():
                output = netD(x, y)
                errD_real = loss(output, real_label)
                logging.info(
                    f"YuWang: shapes: x: {x.shape}, y:{y.shape}, out: {output.shape}, real_label: {real_label.shape}"
                )

                fake = netG(z, y_z)
                output = netD(fake.detach(), y_z)
                errD_fake = loss(output, fake_label)

                logging.info(
                    f"YuWang: shapes: out: {output.shape}, real_label: {real_label.shape}, fake_label: {fake_label.shape}, errD_real: {errD_real.shape}, errD_fake: {errD_fake.shape}"
                )
                errD = errD_real + errD_fake
                errD.backward()
            trainerD.step(data.shape[0])

            # Train Generator
            with autograd.record():
                fake = netG(z, y_z)
                output = netD(fake, y_z)
                errG = loss(output, real_label)
                errG.backward()
            trainerG.step(data.shape[0])

            if i % 50 == 0:
                logging.info(
                    f'speed: {batch_size / (time.time() - btic)} samples/s')
                logging.info(
                    f'discriminator loss = {nd.mean(errD).asscalar()}, generator loss = {nd.mean(errG).asscalar()} at iter {i} epoch {epoch}'
                )

            i = i + 1
            btic = time.time()
        if epoch % 5 == 0:
            netD.save_params("netD.params")
            netG.save_params("netG.params")
Exemplo n.º 30
0
def get_inputs(data):
    return [nd.one_hot(X, vocab_size) for X in data.T]
Exemplo n.º 31
0
def get_inputs(data):
    return [nd.one_hot(X, vocab_size) for X in data.T]