Exemplo n.º 1
0
def distill_test(conf):
    model.eval()
    if conf['model_name'] in ['GCN', 'APPNP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] in ['GAT', 'SGAT']:
        logits, G.edata['a'] = model(G.ndata['feat'])
    elif conf['model_name'] in ['GraphSAGE', 'SGC']:
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'MoNet':
        us, vs = G.edges(order='eid')
        udeg, vdeg = 1 / torch.sqrt(G.in_degrees(us).float()), 1 / torch.sqrt(G.in_degrees(vs).float())
        pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
        logits = model(G.ndata['feat'], pseudo)
    elif conf['model_name'] == 'GCNII':
        logits = model(features, adj)
    else:
        raise ValueError(f'Undefined Model')
    logp = F.log_softmax(logits, dim=1)
    loss_test = F.nll_loss(logp[idx_test], labels[idx_test])
    preds = torch.argmax(logp, dim=1).cpu().detach()
    teacher_preds = torch.argmax(cas[-1], dim=1).cpu().detach()
    acc_test = accuracy(logp[idx_test], labels[idx_test])
    acc_teacher_test = accuracy(cas[-1][idx_test], labels[idx_test])
    same_predict = np.count_nonzero(teacher_preds[idx_test] == preds[idx_test]) / len(idx_test)
    acc_dis = np.abs(acc_teacher_test.item() - acc_test.item())
    print("Test set results: loss= {:.4f} acc_test= {:.4f} acc_teacher_test= {:.4f} acc_dis={:.4f} same_predict= {:.4f}".format(
        loss_test.item(), acc_test.item(), acc_teacher_test.item(), acc_dis, same_predict))

    return acc_test, logp, same_predict
Exemplo n.º 2
0
def distill_test(conf, model, G, labels_init, labels, idx_test, cas):
    model.eval()
    if conf['model_name'] in ['GCN', 'APPNP', 'LogReg', 'MLP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] == 'GAT':
        logits, G.edata['a'] = model(G.ndata['feat'])
    elif conf['model_name'] == 'GraphSAGE':
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'PLP':
        logits, G.edata['a'], G.ndata['alpha'], G.ndata['el'], G.ndata['er'] = \
            model(G.ndata['feat'], labels_init)
    logp = F.log_softmax(logits, dim=1)
    loss_test = F.nll_loss(logp[idx_test], labels[idx_test])
    preds = torch.argmax(logp, dim=1).cpu().detach()
    teacher_preds = torch.argmax(cas[-1], dim=1).cpu().detach()
    acc_test = accuracy(logp[idx_test], labels[idx_test])
    acc_teacher_test = accuracy(cas[-1][idx_test], labels[idx_test])
    same_predict = np.count_nonzero(
        teacher_preds[idx_test] == preds[idx_test]) / len(idx_test)
    acc_dis = np.abs(acc_teacher_test.item() - acc_test.item())
    print(
        "Test set results: loss= {:.4f} acc_test= {:.4f} acc_teacher_test= {:.4f} acc_dis={:.4f} same_predict= {:.4f}"
        .format(loss_test.item(), acc_test.item(), acc_teacher_test.item(),
                acc_dis, same_predict))

    return acc_test, logp, same_predict
Exemplo n.º 3
0
def train(all_logits, dur, epoch):
    t0 = time.time()
    model.train()
    optimizer.zero_grad()
    if conf['model_name'] in ['GCN', 'APPNP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] in ['GAT', 'SGAT']:
        logits, _ = model(G.ndata['feat'])
    elif conf['model_name'] in ['GraphSAGE', 'SGC']:
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'MoNet':
        us, vs = G.edges(order='eid')
        udeg, vdeg = 1 / torch.sqrt(G.in_degrees(us).float()), 1 / torch.sqrt(
            G.in_degrees(vs).float())
        pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
        logits = model(G.ndata['feat'], pseudo)
    elif conf['model_name'] == 'GCNII':
        logits = model(features, adj)
    else:
        raise ValueError(f'Undefined Model')
    logp = F.log_softmax(logits, dim=1)
    # we only compute loss for labeled nodes
    loss = F.nll_loss(logp[idx_train], labels[idx_train])
    acc_train = accuracy(logp[idx_train], labels[idx_train])
    loss.backward()
    optimizer.step()
    dur.append(time.time() - t0)
    model.eval()
    if conf['model_name'] in ['GCN', 'APPNP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] in ['GAT', 'SGAT']:
        logits, _ = model(G.ndata['feat'])
    elif conf['model_name'] in ['GraphSAGE', 'SGC']:
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'MoNet':
        us, vs = G.edges(order='eid')
        udeg, vdeg = 1 / torch.sqrt(G.in_degrees(us).float()), 1 / torch.sqrt(
            G.in_degrees(vs).float())
        pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
        logits = model(G.ndata['feat'], pseudo)
    elif conf['model_name'] == 'GCNII':
        logits = model(features, adj)
    else:
        raise ValueError(f'Undefined Model')
    logp = F.log_softmax(logits, dim=1)
    # we save the logits for visualization later
    all_logits.append(logp.cpu().detach().numpy())
    loss_val = F.nll_loss(logp[idx_val], labels[idx_val])
    acc_val = accuracy(logp[idx_val], labels[idx_val])
    acc_test = accuracy(logp[idx_test], labels[idx_test])
    print(
        'Epoch %d | Loss: %.4f | loss_val: %.4f | acc_train: %.4f | acc_val: %.4f | acc_test: %.4f | Time(s) %.4f'
        % (epoch, loss.item(), loss_val.item(), acc_train.item(),
           acc_val.item(), acc_test.item(), dur[-1]))
    return acc_val, loss_val
Exemplo n.º 4
0
def distill_train(all_logits, dur, epoch):
    t0 = time.time()
    model.train()
    optimizer.zero_grad()
    if conf['model_name'] in ['GCN', 'APPNP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] in ['GAT', 'SGAT']:
        logits, _ = model(G.ndata['feat'])
    elif conf['model_name'] in ['GraphSAGE', 'SGC']:
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'MoNet':
        us, vs = G.edges(order='eid')
        udeg, vdeg = 1 / torch.sqrt(G.in_degrees(us).float()), 1 / torch.sqrt(G.in_degrees(vs).float())
        pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
        logits = model(G.ndata['feat'], pseudo)
    elif conf['model_name'] == 'GCNII':
        logits = model(features, adj)
    else:
        raise ValueError(f'Undefined Model')
    # print(G.ndata['alpha'])
    logp = F.log_softmax(logits, dim=1)
    # we only compute loss for labeled nodes
    # loss = F.nll_loss(logp[idx_train], labels[idx_train])
    if args.asstype == 0:
        loss = F.nll_loss(logp[idx_train], labels[idx_train]) - 0.5 * F.kl_div(logp, cas[-1])
    else:
        loss = F.kl_div(logp, cas[-1], reduction='batchmean')
    acc_train = accuracy(logp[idx_train], labels[idx_train])
    loss.backward()
    optimizer.step()
    dur.append(time.time() - t0)
    model.eval()
    if conf['model_name'] in ['GCN', 'APPNP', 'LogReg', 'MLP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] == 'GAT':
        logits = model(G.ndata['feat'])[0]
    elif conf['model_name'] == 'GraphSAGE':
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'PLP':
        logits = model(G.ndata['feat'], labels_init)[0]
    logp = F.log_softmax(logits, dim=1)
    all_logits.append(logp.cpu().detach().numpy())
    # if conf['model_name'] == 'PLP':
    #     loss_val = my_loss(logp[idx_no_train], cas[-1][idx_no_train]) + F.nll_loss(logp[idx_val], labels[idx_val])
    # else:
    #     loss_val = my_loss(logp, cas[-1])
    loss_val = my_loss(logp[idx_val], cas[-1][idx_val])
    # loss_val = loss
    # loss_val = F.nll_loss(logp[idx_val], labels[idx_val])
    acc_val = accuracy(logp[idx_val], labels[idx_val])
    acc_test = accuracy(logp[idx_test], labels[idx_test])
    print('Epoch %d | Loss: %.4f | loss_val: %.4f | acc_train: %.4f | acc_val: %.4f | acc_test: %.4f | Time(s) %.4f' % (
        epoch, loss.item(), loss_val.item(), acc_train.item(), acc_val.item(), acc_test.item(), dur[-1]))
    return acc_val, loss_val
Exemplo n.º 5
0
    def f(w):
        """Weights predictions and returns an accuracy score.

        Args:
            w (float): Array of weights.

        Returns:
            1 - accuracy.

        """

        # Ensuring that the sum of weights is one and avoids division by zero
        w = w / max(w.sum(), c.EPSILON)

        # Gathering the maximum label identifier
        max_label = np.max(labels)

        # Creating an array to hold the weighted predictions
        w_preds = np.zeros((preds.shape[0], max_label + 1))

        # For every possible prediction
        for i in range(preds.shape[0]):
            # For every possible classifier
            for j in range(preds.shape[1]):
                # Sums the weighted classifier's prediction to its position in the final array
                w_preds[i][preds[i][j]] += w[j]

        # Gathers the most weighted prediction
        hat_preds = np.argmax(w_preds, axis=1)

        # Calculates the accuracy
        acc = m.accuracy(hat_preds, labels)

        return 1 - acc
Exemplo n.º 6
0
    def f(w):
        """Weights predictions and returns an accuracy score.

        Args:
            w (float): Array of weights.

        Returns:
            1 - accuracy.

        """

        # Rounding entire weights
        w = np.round(w)

        # Gathering the maximum label identifier
        max_label = np.max(labels)

        # Creating an array to hold the weighted predictions
        w_preds = np.zeros((preds.shape[0], max_label + 1))

        # For every possible prediction
        for i in range(preds.shape[0]):
            # For every possible classifier
            for j in range(preds.shape[1]):
                # Sums the boolean classifier's prediction to its position in the final array
                w_preds[i][preds[i][j]] += w[j]

        # Gathers the most weighted prediction
        hat_preds = np.argmax(w_preds, axis=1)

        # Calculates the accuracy
        acc = m.accuracy(hat_preds, labels)

        return 1 - acc
Exemplo n.º 7
0
def evaluate(weights, preds, labels):
    """Evaluates an ensemble based on optimized weights and classifiers' predictions.

    Args:
        weights (np.array): Array of weights (n_classifiers, 1).
        preds (np.array): Array of predictions of shape (n_samples, n_classifiers).
        labels (np.array): Array of ground truth labels of shape (n_samples, 1).

    Returns:
        An accuracy score.

    """

    # Gathering the maximum label identifier
    max_label = np.max(labels)

    # Creating an array to hold the weighted predictions
    w_preds = np.zeros((preds.shape[0], max_label + 1))

    # For every possible prediction
    for i in range(preds.shape[0]):
        # For every possible classifier
        for j in range(preds.shape[1]):
            # Sums the weighted classifier's prediction to its position in the final array
            w_preds[i][preds[i][j]] += weights[j]

    # Gathers the most weighted prediction
    hat_preds = np.argmax(w_preds, axis=1)

    # Calculates the accuracy
    acc = m.accuracy(hat_preds, labels)

    return acc
def train_model(model,
                dataloader,
                loss_fn,
                optimizer,
                epoch,
                is_lstm,
                use_cuda=False,
                verbose=False):
    # set model to train mode
    model.train()
    top1 = AverageMeter()
    total_loss = 0

    # loop through data batches
    count = 0
    for batch_idx, (X, y) in enumerate(tqdm(dataloader)):
        batch_size = -1
        # Utilize GPU if enabled
        if use_cuda:
            if is_lstm:
                X['X'] = X['X'].cuda()
            else:
                X = X.cuda()
            y = y.cuda(async=True)

        if is_lstm:
            batch_size = X['X'].size(0)
        else:
            batch_size = X.size(0)
        # Compute loss
        predictions = model(X)

        count += predictions.shape[0]
        loss = loss_fn(predictions, y)
        total_loss += loss.item()

        if verbose:
            logging.debug('mini-batch loss: {}'.format(loss))
            logging.debug('y: {}'.format(y))

        # Compute running accuracy
        acc1 = accuracy(predictions.data, y, (1, ))
        top1.update(acc1[0], batch_size)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if verbose:
            print('Progress [{0}/{1} ({2:.0f}%)]\tLoss:{3}'.format(
                count, len(dataloader.dataset),
                100. * batch_idx / len(dataloader), loss.item()))

    total_loss /= count
    train_acc = top1.avg
    logging.info(
        'Train Epoch: {} \tLoss: {:.6f} \t Training Acc: {:.2f}'.format(
            epoch, total_loss, train_acc))

    return total_loss, train_acc
Exemplo n.º 9
0
    def call(self, inputs: list, training: bool = True) -> \
            Tuple[tf.Tensor, tf.Tensor]:
        """
        Forward propagation
        :param inputs: the information passed to next layers
        :param training: whether in the training mode
        """
        support, r_support, features, r_feature, label, idx_mask = inputs

        # forward propagation
        h_r = self.r_agg_layer((support, features))
        h_u, h_i = self.iu_agg_layer((support, features))
        p_e = self.r_gcn_layer((r_feature, r_support), training=True)
        concat_vecs = [h_r, h_u, h_i, p_e]
        gas_out = self.concat_layer((support, concat_vecs))

        # get masked data
        masked_data = tf.gather(gas_out, idx_mask)
        masked_label = tf.gather(label, idx_mask)

        # calculation loss and accuracy()
        logits = tf.nn.softmax(tf.matmul(masked_data, self.u))
        loss = -tf.reduce_sum(tf.math.log(tf.nn.sigmoid(
            masked_label * logits)))
        acc = accuracy(logits, masked_label)

        return loss, acc
def nearest_neighbour(train_data, train_labels, test_data, test_labels):

    print(f'{NearestNeighbour.__name__}:')

    # Create and train model
    model = NearestNeighbour(5, dist=manhattan)
    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_pred = model.predict(test_data)
    save_predictions(nearest_neighbour.__name__, test_pred)
    print('Saved 10k predictions.\n')
def linear_svm(train_data, train_labels, test_data, test_labels):
    print(f'{LinearSVM.__name__}:')

    # Create and train model
    lsvm_model = LinearSVM(alpha=0.01, features=180)
    model = OneVersusRest(lsvm_model)

    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_pred = model.predict(test_data)
    save_predictions(linear_svm.__name__, test_pred)
    print('Saved 10k predictions.\n')
def evaluate(dataset, model, target_size, return_pred=False, loss_function=None):
    results = []

    eval_loader = data_reader.data_loader(dataset, config.BATCH_SIZE, shuffle=False)
    n_batches = int(np.ceil(len(dataset)/config.BATCH_SIZE))
    pbar = tqdm(range(n_batches))
    model.eval()
    total_loss = 0.
    for i in pbar:
        x, x_rc, md, y, x_len = next(iter(eval_loader))
        score = model(x, x_rc, md)
        loss = 0.
        if loss_function is not None:
            loss = loss_function(score, y)
            total_loss += loss.mean().item()
        probs = torch.nn.Softmax(dim=1)(score)
        results.append(probs.detach().cpu().numpy())
    model.train()

    results = np.vstack(results)

    if loss_function is not None:
        print('Val Loss: {:.5f}'.format(total_loss/n_batches))

    if return_pred:
        return results

    y_pred = np.argmax(results, axis=1)
    acc = accuracy(dataset['y'], y_pred)
    top10 = top10_accuracy_scorer(np.array(dataset['y']), results, target_size=target_size)
    f1 = f1_score(dataset['y'], y_pred)
    # print(acc, top10, f1)
    return acc, top10, f1
Exemplo n.º 13
0
def main_test(args):
    voc, char2id, id2char = get_vocabulary(voc_type=args.voc_type)

    input_images = tf.placeholder(dtype=tf.float32, shape=[1, args.height, None, 3], name="input_images")
    input_images_width = tf.placeholder(dtype=tf.float32, shape=[1], name="input_images_width")
    input_labels = tf.placeholder(dtype=tf.int32, shape=[1, args.max_len], name="input_labels")
    sar_model = SARModel(num_classes=len(voc),
                         encoder_dim=args.encoder_sdim,
                         encoder_layer=args.encoder_layers,
                         decoder_dim=args.decoder_sdim,
                         decoder_layer=args.decoder_layers,
                         decoder_embed_dim=args.decoder_edim,
                         seq_len=args.max_len,
                         is_training=False)

    model_infer, attention_weights, pred = sar_model(input_images, input_labels, input_images_width, batch_size=1, reuse=False)
    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32)
    variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
    saver = tf.train.Saver(variable_averages.variables_to_restore())
    
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        ckpt_state = tf.train.get_checkpoint_state(args.checkpoints)
        model_path = os.path.join(args.checkpoints, os.path.basename(ckpt_state.model_checkpoint_path))
        print('Restore from {}'.format(model_path))
        saver.restore(sess, model_path)

        images_path, labels = get_data(args)
        predicts = []
        for img_path, label in zip(images_path, labels):
            try:
                img = cv2.imread(img_path)
            except Exception as e:
                print("{} error: {}".format(img_path, e))
                continue

            img, la, width = data_preprocess(img, label, char2id, args)

            pred_value, attention_weights_value = sess.run([pred, attention_weights], feed_dict={input_images: [img],
                                                                                                 input_labels: [la],
                                                                                                 input_images_width: [width]})
            pred_value_str = idx2label(pred_value, id2char, char2id)[0]
            print("predict: {} label: {}".format(pred_value_str, label))
            predicts.append(pred_value_str)
            pred_value_str += '$'
            if args.vis_dir != None and args.vis_dir != "":
                os.makedirs(args.vis_dir, exist_ok=True)
                assert len(img.shape) == 3
                att_maps = attention_weights_value.reshape([-1, attention_weights_value.shape[2], attention_weights_value.shape[3], 1]) # T * H * W * 1
                for i, att_map in enumerate(att_maps):
                    if i >= len(pred_value_str):
                        break
                    att_map = cv2.resize(att_map, (img.shape[1], img.shape[0]))
                    _att_map = np.zeros(dtype=np.uint8, shape=[img.shape[0], img.shape[1], 3])
                    _att_map[:, :, -1] = (att_map * 255).astype(np.uint8)

                    show_attention = cv2.addWeighted(img, 0.5, _att_map, 2, 0)
                    cv2.imwrite(os.path.join(args.vis_dir, os.path.basename(img_path).split('.')[0] + "_" + str(i) + "_" + pred_value_str[i] + ".jpg"), show_attention)

    acc_rate = accuracy(predicts, labels)
    print("Done, Accuracy: {}".format(acc_rate))
Exemplo n.º 14
0
    def call(self, inputs):
        """:param inputs include support, x, label, mask
        support means a list of the sparse adjacency Tensor
        x means feature
        label means label tensor
        mask means a list of mask tensors to obtain the train data
        """

        supports, x, label, idx_mask = inputs

        # forward propagation
        outputs = [self.input_layer((x, supports, self.h_0))]
        for layer in self.layers_:
            hidden = layer((x, supports, outputs[-1]))
            outputs.append(hidden)
        gem_out = outputs[-1]

        # get masked data
        masked_data = tf.gather(gem_out, idx_mask)
        masked_label = tf.gather(label, idx_mask)

        # Eq. (7) in paper
        logits = tf.nn.softmax(tf.matmul(masked_data, self.u))
        loss = -tf.reduce_sum(tf.math.log(tf.nn.sigmoid(
            masked_label * logits)))
        acc = accuracy(logits, masked_label)

        return loss, acc
def logistic_regression(train_data, train_labels, test_data, test_labels):

    print(f'{LogisticRegression.__name__}:')

    # Create and train model
    lr_model = LogisticRegression(train_data.shape[1], eta=0.001, epochs=50)
    model = OneVersusRest(lr_model)

    model.train(train_data, train_labels)

    # Predict 2000 validation set samples and calculate accuracy
    test_data_2k = test_data[:len(test_labels)]
    test_pred = model.predict(test_data_2k)

    # Print metrics
    print('\nTest Accuracy: {:.02f}%\n'.format(
        100 * accuracy(test_pred, test_labels)))
    mat, classes = confusion_matrix(test_pred, test_labels)
    print('Precision:\n{}\n'.format(
        np.round(precision(test_pred, test_labels), 2)))
    print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2)))
    print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2)))
    print('Confusion Matrix:')
    print(mat)

    # Predict 10000 test set samples and save predictions
    print('Predicting 10k samples...')
    test_pred = model.predict(test_data)
    save_predictions(logistic_regression.__name__, test_pred)
    print('Saved 10k predictions.\n')
Exemplo n.º 16
0
def calc_metric(pred, true):
    tmp_jac = jaccard(pred, true)
    tmp_ham = hamming(pred, true)
    tmp_prec = precision(pred, true)
    tmp_rec = recall(pred, true)
    tmp_f1 = f1_score(pred, true)
    tmp_acc = accuracy(pred, true)
    return tmp_jac, tmp_ham, tmp_prec, tmp_rec, tmp_f1, tmp_acc
Exemplo n.º 17
0
def inference(sampler, dataloader, args):
    output_inf_file = open(args.preds_dir + '.inf', mode='w', encoding='utf8')

    greedy_stats = {}
    greedy_accuracy = 0

    for batch in dataloader:
        sampler.set_batch(batch)
        src = sampler.get_src_sentence(0)
        trg = sampler.get_trg_sentence(0)
        src_gender = sampler.get_src_gender(0)
        trg_gender = sampler.get_trg_gender(0)


        translated = sampler.greedy_decode(sentence=src, trg_gender=trg_gender)

        greedy_accuracy += accuracy(trg=trg, pred=translated)

        correct = 'CORRECT!' if trg == translated else 'INCORRECT!'

        if translated == trg:
            greedy_stats[(src_gender, trg_gender, 'correct')] = 1 + greedy_stats.get((src_gender, trg_gender, 'correct'), 0)
        else:
            greedy_stats[(src_gender, trg_gender, 'incorrect')] = 1 + greedy_stats.get((src_gender, trg_gender, 'incorrect'), 0)

        output_inf_file.write(translated)
        output_inf_file.write('\n')

        logger.info(f'src:\t\t\t{src}')
        logger.info(f'trg:\t\t\t{trg}')
        logger.info(f'greedy:\t\t\t{translated}')
        logger.info(f'src gender:\t\t{src_gender}')
        logger.info(f'trg gender:\t\t{trg_gender}')

        logger.info(f'res:\t\t\t{correct}')
        logger.info('\n\n')

    greedy_accuracy /= len(dataloader)

    output_inf_file.close()

    logger.info('*******STATS*******')
    total_examples = sum([greedy_stats[x] for x in greedy_stats])
    logger.info(f'TOTAL EXAMPLES: {total_examples}')
    logger.info('\n')

    correct_greedy = {(x[0], x[1]): greedy_stats[x] for x in greedy_stats if x[2] == 'correct'}
    incorrect_greedy = {(x[0], x[1]): greedy_stats[x] for x in greedy_stats if x[2] == 'incorrect'}
    total_correct_greedy = sum([v for k,v in correct_greedy.items()])
    total_incorrect_greedy = sum([v for k, v in incorrect_greedy.items()])

    logger.info('Results using greedy decoding:')
    for x in correct_greedy:
        logger.info(f'{x[0]}->{x[1]}')
        logger.info(f'\tCorrect: {correct_greedy.get(x, 0)}\tIncorrect: {incorrect_greedy.get(x, 0)}')
    logger.info(f'--------------------------------')
    logger.info(f'Total Correct: {total_correct_greedy}\tTotal Incorrect: {total_incorrect_greedy}')
    logger.info(f'Accuracy:\t{greedy_accuracy}')
Exemplo n.º 18
0
def card(corpus):
    global vectorizer
    jac_list = []
    ham_list = []
    prec_list = []
    rec_list = []
    f1_list = []
    acc_list = []
    id2term = {}
    for k, v in vectorizer.vocabulary_.items():
        id2term[v] = k
    for idx, item in enumerate(corpus):
        docstring = vectorizer.transform([item[-1]])[0]
        true_types = item[3]
        terms = [id2term[i] for i in docstring.indices]
        pred_types = []
        # try hint terms of each basic type whether match a type
        # term pattern `\w+(\_)?name|\w+(\_)?method`
        # is more likely to be a `str` type
        for basic_type in basic_types:
            h_terms = hint_terms[basic_type]
            if set(terms) & set(h_terms):
                pred_types.append('List' if basic_type ==
                                  'Tuple' else basic_type)
            elif basic_type == 'str':
                for term in terms:
                    if type_to_regexp[basic_type].match(term):
                        pred_types.append(basic_type)
                        break
        # if type `Dict` and `str` in pred_types at same time
        # remove `str` as type hardly never occur `Dict` and `str` at same time
        if 'Dict' in pred_types and 'str' in pred_types:
            pred_types.remove('str')
        # type `Type` often occur independently
        if 'Type' in pred_types and len(pred_types) > 1:
            pred_types.remove('Type')
        # if pred_types is empty
        # then find whether exists a term whose part of speech(pos) is NNS or NNPS
        # if found, the pos of this variable is likely to be `List`
        if not pred_types:
            pos_tags = [tag for w, tag in nltk.pos_tag(terms)]
            if set(['NNS', 'NNPS']) & set(pos_tags):
                pred_types = ['List']

        pred_types = set(pred_types)
        included_types = set(true_types) - set(['NoneType'])
        if 'Tuple' in included_types:
            included_types -= set(['Tuple'])
            included_types.add('List')

        # calculate Jaccard Coefficient
        jac_list.append(jaccard(pred_types, included_types))
        ham_list.append(hamming(pred_types, included_types))
        prec_list.append(precision(pred_types, included_types))
        rec_list.append(recall(pred_types, included_types))
        f1_list.append(f1_score(pred_types, included_types))
        acc_list.append(accuracy(pred_types, included_types))
    return jac_list, ham_list, prec_list, rec_list, f1_list, acc_list
Exemplo n.º 19
0
        def train():
            print("  Number of training samples %d" % len(train_ind))
            print("  Start training...\r\n")
            acc = 0
            for epoch in range(opt.num_iter):
                model.train()
                optimizer.zero_grad()

                with torch.set_grad_enabled(True):
                    node_logits, edge_weights = model(features_cuda,
                                                      edge_index,
                                                      edgenet_input)
                    loss = loss_fn(node_logits[train_ind], labels[train_ind])
                    loss.backward()
                    optimizer.step()
                correct_train, acc_train = accuracy(
                    node_logits[train_ind].detach().cpu().numpy(),
                    y[train_ind])

                model.eval()
                with torch.set_grad_enabled(False):
                    node_logits, _ = model(features_cuda, edge_index,
                                           edgenet_input)
                logits_test = node_logits[test_ind].detach().cpu().numpy()
                correct_test, acc_test = accuracy(logits_test, y[test_ind])
                auc_test = auc(logits_test, y[test_ind])
                prf_test = prf(logits_test, y[test_ind])

                print("Epoch: {},\tce loss: {:.5f},\ttrain acc: {:.5f}".format(
                    epoch, loss.item(), acc_train.item()))
                if acc_test > acc and epoch > 9:
                    acc = acc_test
                    correct = correct_test
                    aucs[fold] = auc_test
                    prfs[fold] = prf_test
                    if opt.ckpt_path != '':
                        if not os.path.exists(opt.ckpt_path):
                            #print("Checkpoint Directory does not exist! Making directory {}".format(opt.ckpt_path))
                            os.makedirs(opt.ckpt_path)
                        torch.save(model.state_dict(), fold_model_path)

            accs[fold] = acc
            corrects[fold] = correct
            print("\r\n => Fold {} test accuacry {:.5f}".format(fold, acc))
Exemplo n.º 20
0
def distill_train(all_logits, dur, epoch, model, optimizer, conf, G,
                  labels_init, labels, idx_no_train, idx_train, idx_val,
                  idx_test, cas):
    t0 = time.time()
    model.train()
    optimizer.zero_grad()
    if conf['model_name'] in ['GCN', 'APPNP', 'LogReg', 'MLP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] == 'GAT':
        logits = model(G.ndata['feat'])[0]
    elif conf['model_name'] == 'GraphSAGE':
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'PLP':
        logits = model(G.ndata['feat'], labels_init)[0]
    logp = F.log_softmax(logits, dim=1)
    # we only compute loss for labeled nodes
    if conf['model_name'] == 'PLP':
        loss = my_loss(logp[idx_no_train], cas[-1][idx_no_train])
    else:
        loss = F.kl_div(logp, cas[-1], reduction='batchmean')
    acc_train = accuracy(logp[idx_train], labels[idx_train])
    loss.backward()
    optimizer.step()
    dur.append(time.time() - t0)
    model.eval()
    if conf['model_name'] in ['GCN', 'APPNP', 'LogReg', 'MLP']:
        logits = model(G.ndata['feat'])
    elif conf['model_name'] == 'GAT':
        logits = model(G.ndata['feat'])[0]
    elif conf['model_name'] == 'GraphSAGE':
        logits = model(G, G.ndata['feat'])
    elif conf['model_name'] == 'PLP':
        logits = model(G.ndata['feat'], labels_init)[0]
    logp = F.log_softmax(logits, dim=1)
    all_logits.append(logp.cpu().detach().numpy())
    loss_val = my_loss(logp[idx_val], cas[-1][idx_val])
    acc_val = accuracy(logp[idx_val], labels[idx_val])
    acc_test = accuracy(logp[idx_test], labels[idx_test])
    print(
        'Epoch %d | Loss: %.4f | loss_val: %.4f | acc_train: %.4f | acc_val: %.4f | acc_test: %.4f | Time(s) %.4f'
        % (epoch, loss.item(), loss_val.item(), acc_train.item(),
           acc_val.item(), acc_test.item(), dur[-1]))
    return acc_val, loss_val
Exemplo n.º 21
0
def valid(net, optimizer, loss, valid_loader, save_imgs=False, fold_num=0):
    net.eval()
    #keep track of preds
    val_preds, val_labels = generate_preds(net, valid_loader, attn=True)

    epoch_vloss = loss(val_preds, val_labels)
    epoch_vf1 = macro_f1(val_preds.numpy() > 0., val_labels.numpy())
    epoch_vacc = accuracy(val_preds.numpy() > 0., val_labels.numpy())
    print(
        'Avg Eval Loss: {:.4}, Avg Eval Macro F1: {:.4}, Avg Eval Acc. {:.4}'.
        format(epoch_vloss, epoch_vf1, epoch_vacc))
    return epoch_vloss, epoch_vf1
def validate_model(model,
                   dataloader,
                   loss_fn,
                   is_lstm,
                   predictions_saver=None,
                   use_cuda=False,
                   verbose=False):
    # set the model to evaluation mode
    model.eval()
    top1 = AverageMeter()

    count = 0
    for batch_idx, (X, y) in enumerate(tqdm(dataloader)):
        batch_size = -1
        # Utilize GPU if enabled
        if use_cuda:
            if is_lstm:
                X['X'] = X['X'].cuda()
            else:
                X = X.cuda()
            y = y.cuda(async=True)

        if is_lstm:
            batch_size = X['X'].size(0)
        else:
            batch_size = X.size(0)
        # compute output
        predictions = model(X)
        count += predictions.shape[0]
        loss = loss_fn(predictions, y)

        if verbose:
            print('Valid/Test Progress [{0}/{1} ({2:.0f}%)]\tLoss:{3}'.format(
                count, len(dataloader.dataset),
                100. * batch_idx / len(dataloader), loss.item()))

        if is_lstm and predictions_saver:
            prediction_indices = torch.argmax(predictions, dim=1)
            for batch_index in range(batch_size):
                predictions_saver.update([
                    #X['video_dirs'][batch_index].strip('/').split('/')[-1], # id
                    'replace me',
                    y.cpu().numpy()[batch_index]
                    if use_cuda else y.numpy()[batch_index],  # label	
                    prediction_indices.cpu().numpy()[batch_index] if use_cuda
                    else prediction_indices.numpy()[batch_index]  # prediction
                ])

        # measure accuracy
        acc1 = accuracy(predictions.data, y, (1, ))
        top1.update(acc1[0], batch_size)

    return top1.avg
Exemplo n.º 23
0
def compare_between_models():
    tweets = load_anonymized_sentiment_tweets()
    print("Tweets:", tweets[:2])
    classifier = Classifier(unigrams=True,
                            bigrams=True,
                            classifier_type=Type.BOW,
                            tfidf=False)
    act, bow_pred = classifier.ten_fold_cross_validation(tweets)
    print("BOW")
    metrics.accuracy(act, bow_pred)

    classifier = Classifier(unigrams=True,
                            bigrams=True,
                            classifier_type=Type.NB,
                            tfidf=False)
    act, nb_pred = classifier.ten_fold_cross_validation(tweets)
    print("NB")
    metrics.accuracy(act, nb_pred)

    classifier = Classifier(unigrams=True,
                            bigrams=True,
                            classifier_type=Type.SVM,
                            tfidf=False)
    act, svm_pred = classifier.ten_fold_cross_validation(tweets)
    print("SVM")
    metrics.accuracy(act, svm_pred)

    print("BOW and NB")
    metrics.permutation_test(act, bow_pred, nb_pred)

    print("BOW and SVM")
    metrics.permutation_test(act, bow_pred, svm_pred)

    print("NB and SVM")
    metrics.permutation_test(act, nb_pred, svm_pred)
Exemplo n.º 24
0
        def evaluate():
            print("  Number of testing samples %d" % len(test_ind))
            print('  Start testing...')
            model.load_state_dict(torch.load(fold_model_path))
            model.eval()
            node_logits, _ = model(features_cuda, edge_index, edgenet_input)

            logits_test = node_logits[test_ind].detach().cpu().numpy()
            corrects[fold], accs[fold] = accuracy(logits_test, y[test_ind])
            aucs[fold] = auc(logits_test, y[test_ind])
            prfs[fold] = prf(logits_test, y[test_ind])

            print("  Fold {} test accuracy {:.5f}, AUC {:.5f}".format(
                fold, accs[fold], aucs[fold]))
Exemplo n.º 25
0
def run_epoch(model, loader, loss_fn, optimizer, desc_default='', epoch=0, writer=None, verbose=1, scheduler=None):
    if verbose:
        loader = tqdm(loader, disable=False)
        loader.set_description('[{} {}/{}]'.format(desc_default, epoch, C.get()['epoch']))

    metrics = Accumulator()
    cnt = 0
    total_steps = len(loader)
    steps = 0
    for data, label in loader:
        steps += 1
        data, label = data.cuda(), label.cuda()

        if optimizer:
            optimizer.zero_grad()

        preds = model(data)
        loss = loss_fn(preds, label)

        if optimizer:
            loss.backward()
            if C.get()['optimizer'].get('clip', 5) > 0:
                nn.utils.clip_grad_norm_(model.parameters(), C.get()['optimizer'].get('clip', 5))
            optimizer.step()

        top1, top5 = accuracy(preds, label, (1, 5))
        metrics.add_dict({
            'loss': loss.item() * len(data),
            'top1': top1.item() * len(data),
            'top5': top5.item() * len(data),
        })
        cnt += len(data)
        if verbose:
            postfix = metrics / cnt
            if optimizer:
                postfix['lr'] = optimizer.param_groups[0]['lr']
            loader.set_postfix(postfix)

        if scheduler is not None:
            scheduler.step(epoch - 1 + float(steps) / total_steps)

        del preds, loss, top1, top5, data, label

    metrics /= cnt
    if optimizer:
        metrics.metrics['lr'] = optimizer.param_groups[0]['lr']
    if verbose:
        for key, value in metrics.items():
            writer.add_scalar(key, value, epoch)
    return metrics
Exemplo n.º 26
0
def validate(cur_gpu, val_loader, model, criterion, epoch, hparams):
    logger = get_logger()
    model.eval()

    if logger:
        loss_meter = AverageMeter('val_loss')
        acc1_meter = AverageMeter('val_acc1')
        acc5_meter = AverageMeter('val_acc5')

    model_module = model.module if hparams.distributed_mode == 'gpus' else model

    for i, (image, target) in enumerate(val_loader):
        with torch.no_grad():
            if cur_gpu >= 0:
                image = image.cuda(cur_gpu, non_blocking=True)
                target = target.cuda(cur_gpu, non_blocking=True)

            if hparams.fp16:
                image = image.half()

            output = model(image)

            if hparams.fp16:
                output = output.float()

            loss = criterion(output,
                             target,
                             label_smoothing=hparams.label_smoothing)
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            bs = torch.tensor(image.size(0),
                              device='cuda:%d' %
                              cur_gpu if cur_gpu >= 0 else None)

            if hparams.distributed_mode == 'gpus':
                bs, loss, acc1, acc5 = batch_reduce(bs, loss, acc1, acc5)

            if logger:
                loss_meter.update(loss.item(), bs.item())
                acc1_meter.update(acc1.item(), bs.item())
                acc5_meter.update(acc5.item(), bs.item())

    loss, acc1, acc5 = None, None, None
    if logger:
        metrics = [('val_loss', loss_meter.result),
                   ('val_acc1', acc1_meter.result),
                   ('val_acc5', acc5_meter.result)]
        logger.log_metrics(metrics, epoch + 1, 0, 'val')
        logger.log_summaries(model_module.get_summaries(), epoch + 1, 0, 'val')
        loss, acc1, acc5 = loss_meter.result, acc1_meter.result, acc5_meter.result
    return loss, acc1, acc5
def validate_eval(val_loader, model, criterion, args, epoch=None, fnames=[]):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    model.eval()

    end = time.time()
    scores = np.zeros((len(val_loader.dataset), args.num_class))
    labels = np.zeros((len(val_loader.dataset), ))
    for i, (frames, objects, target) in enumerate(val_loader):
        with torch.no_grad():
            target = target.cuda(async=True)
            frames = frames.cuda()
            objects = objects.cuda()
            output = model(frames, objects)

            loss = criterion(output, target)
            losses.update(loss.item(), target.size(0))
            prec1 = accuracy(output.data, target)
            top1.update(prec1[0], target.size(0))

            batch_time.update(time.time() - end)
            end = time.time()

            # Record scores.
            output_f = F.softmax(output, dim=1)  # To [0, 1]
            output_np = output_f.data.cpu().numpy()
            labels_np = target.data.cpu().numpy()
            b_ind = i * args.batch_size
            e_ind = b_ind + min(args.batch_size, output_np.shape[0])
            scores[b_ind:e_ind, :] = output_np
            labels[b_ind:e_ind] = labels_np

    print(
        'Test [Epoch {0}/{1}]:  '
        '*Time {2:.2f}mins ({batch_time.avg:.2f}s)  '
        '*Loss {loss.avg:.4f}  '
        '*Prec@1 {top1.avg:.3f}'.format(epoch,
                                        args.epoch,
                                        batch_time.sum / 60,
                                        batch_time=batch_time,
                                        top1=top1,
                                        loss=losses))

    model.train()
    res_scores = multi_scores(scores, labels,
                              ['precision', 'recall', 'average_precision'])
    return top1.avg, losses.avg, res_scores['precision'], res_scores[
        'recall'], res_scores['average_precision']
Exemplo n.º 28
0
 def evaluate(self, data, labels):
     """
     Args:
         data: vector of embeddings
         labels: ground truth
     Returns: Metrics on the result.
     """
     predictions = self.predict(data)
     return {
         "Accuracy": metrics.accuracy(labels, predictions),
         "Recall": metrics.recall(labels, predictions),
         "Precision": metrics.precision(labels, predictions),
         "F1": metrics.f1(labels, predictions),
         "Predictions": predictions
     }
Exemplo n.º 29
0
    def train_one_epoch(self, data_type = "None"):
        """
        One epoch of training
        :return:
        """
        # Initialize tqdm
        if data_type == "STRONG":
            tqdm_batch = tqdm(self.data_loader.train_SE_loader, total=self.data_loader.train_SE_iterations,
                desc="Epoch-{}-".format(self.current_epoch))
            print( "Training with SEMEVAL data" )
        else:
            tqdm_batch = tqdm(self.data_loader.train_loader, total=self.data_loader.train_iterations,
                            desc="Epoch-{}-".format(self.current_epoch))
            print( "Training with All week data and SEMEVAL data" )
        self.model.train()

        # Initialize your average meters
        epoch_loss = AverageMeter()
        epoch_acc = AverageMeter()

        for itr, (x, y, _) in enumerate(tqdm_batch):
            if self.cuda:
                x, y = x.cuda(non_blocking=True), y.cuda(non_blocking=True)
            # model
            pred = self.model(x.permute(1 ,0))
            # loss
            y = torch.max(y, 1)[1]
            y = y.long()
            cur_loss = self.loss(pred, y)
            if np.isnan(float(cur_loss.item())):
                raise ValueError('Loss is nan during training...')

            # optimizer
            self.optimizer.zero_grad()
            cur_loss.backward()
            self.optimizer.step()
            
            epoch_loss.update(cur_loss.item())
            batch_accuracy = accuracy(y, pred)
            epoch_acc.update(batch_accuracy, x.size(0))

            self.current_iteration += 1

        self.summary_writer.add_scalar("epoch-training/loss", epoch_loss.val, self.current_iteration)
        self.summary_writer.add_scalar("epoch_training/accuracy", epoch_acc.val, self.current_iteration)
        tqdm_batch.close()

        print( "Training Results at epoch-" + str(self.current_epoch) + " | " + "loss: " + str(epoch_loss.val) + " - acc-: " + str(epoch_acc.val ))
Exemplo n.º 30
0
    def call(self, inputs: list, training: bool = True) -> \
            Tuple[tf.Tensor, tf.Tensor]:
        """
        Forward propagation
        :param inputs: the information passed to next layers
        :param training: whether in the training mode
        """
        adj_data, u_i, u_j, graph_label, label, idx_mask = inputs

        # node level attention
        h1 = []
        for v in range(self.view_num):
            h = self.node_att_layer[v]([self.emb, adj_data[v]])
            h = tf.reshape(h, [self.nodes, self.emb.shape[1]])
            h1.append(h)
        h1 = tf.concat(h1, 0)
        h1 = tf.reshape(h1, [self.view_num, self.nodes, self.init_emb_size])

        # view level attention
        h2 = self.view_att_layer(h1)
        a_u = self.olp(h2)

        # get masked data
        masked_data = tf.gather(a_u, idx_mask)
        masked_label = tf.gather(label, idx_mask)

        # calculation loss and accuracy
        logits = tf.nn.softmax(tf.matmul(masked_data, self.theta))

        # Eq. (5)
        loss1 = -(1 / len(idx_mask)) * tf.reduce_sum(
            masked_label * tf.math.log(tf.nn.softmax(logits)))

        u_i_embedding = tf.nn.embedding_lookup(a_u, tf.cast(u_i,
                                                            dtype=tf.int32))
        u_j_embedding = tf.nn.embedding_lookup(a_u, tf.cast(u_j,
                                                            dtype=tf.int32))
        inner_product = tf.reduce_sum(u_i_embedding * u_j_embedding, axis=1)

        # Eq. (6)
        loss2 = -tf.reduce_mean(
            tf.math.log_sigmoid(graph_label * inner_product))

        # Eq. (7)
        loss = self.alpha * loss1 + (1 - self.alpha) * loss2
        acc = accuracy(logits, masked_label)

        return loss, acc