def cross_validate(self, learner, x, labels):
        if not has_func(learner, "fit") or not has_func(learner, "predict"):
            raise ValueError("Learner doesn't have fit(x) or predict(x) functions implemented")
        train_agg = {"accuracy": 0.0, "precision": 0.0, "recall": 0.0, "f-1": 0.0}
        val_agg = {"accuracy": 0.0, "precision": 0.0, "recall": 0.0, "f-1": 0.0}
        train_scores, val_scores = [], []
        for fold in range(self.k):
            training, val = self._partition(x, fold)
            training_labels, val_labels = self._partition(labels, fold)
            learner.fit(training)
            training_predicted = learner.predict(training)
            val_predicted = learner.predict(val)

            # print("Training: {}\nVal: {}\nTLabels: {}\nVlabels: {}".format(training, val, training_labels, val_labels))
            # print("Training predicted", len(training_predicted), training_predicted)
            # print("Validation predicted", len(val_predicted), val_predicted)

            acc, (p, r, f1) = accuracy(training_labels, training_predicted), get_metrics(training_labels, training_predicted, class_label=1)
            scores = {"accuracy": acc, "precision": p, "recall": r, "f-1": f1}
            train_scores.append(scores)
            self._update_scores(scores, train_agg)

            acc, (p, r, f1) = accuracy(val_labels, val_predicted), get_metrics(val_labels, val_predicted, class_label=1)
            scores = {"accuracy": acc, "precision": p, "recall": r, "f-1": f1}
            val_scores.append(scores)
            self._update_scores(scores, val_agg)
        return self._aggregate_scores(train_agg, self.k), self._aggregate_scores(val_agg, self.k), train_scores, val_scores
Пример #2
0
 def train_batch(self, images, labels, model, optimizer):
     model.zero_grad()
     optimizer.zero_grad()
     if isinstance(self.criterion, torch.nn.CrossEntropyLoss) or isinstance(
             self.criterion, models.CutMixCrossEntropyLoss):
         logits = model(images)
         loss = self.criterion(logits, labels)
     else:
         logits, loss = self.criterion(model, images, labels, optimizer)
     if isinstance(self.criterion, models.CutMixCrossEntropyLoss):
         _, labels = torch.max(labels.data, 1)
     loss.backward()
     grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                self.config.grad_clip)
     optimizer.step()
     if logits.shape[1] >= 5:
         acc, acc5 = util.accuracy(logits, labels, topk=(1, 5))
         acc, acc5 = acc.item(), acc5.item()
     else:
         acc, = util.accuracy(logits, labels, topk=(1, ))
         acc, acc5 = acc.item(), 1
     self.loss_meters.update(loss.item(), labels.shape[0])
     self.acc_meters.update(acc, labels.shape[0])
     self.acc5_meters.update(acc5, labels.shape[0])
     payload = {
         "acc": acc,
         "acc_avg": self.acc_meters.avg,
         "loss": loss,
         "loss_avg": self.loss_meters.avg,
         "lr": optimizer.param_groups[0]['lr'],
         "|gn|": grad_norm
     }
     return payload
Пример #3
0
def test_tree_classifier():
    """
    :return: None

    Function to test decision tree classifier
    """
    # X, Y = get_adult_data()
    # attr_types = [int for _ in range(X.shape[1])]

    data = load_breast_cancer()
    X = data.data
    Y = data.target.reshape(data.target.size)

    attr_types = [float for _ in range(X.shape[1])]

    Xtrain, Ytrain, Xtest, Ytest = split_data(X, Y, 0.8)
    model = ClassificationTree()
    print("Training..")
    model.train(Xtrain, Ytrain, attr_types)
    model.prune_tree(Xtrain, Ytrain)
    cY = model.predict(Xtest)
    print("Accuracy: {}".format(accuracy(Ytest, cY)))

    clf = tree.DecisionTreeClassifier()
    clf.fit(Xtrain, Ytrain.reshape(Ytrain.size))
    cY = clf.predict(Xtest)
    print("Scikit accuracy: ".format(accuracy(Ytest, cY)))
Пример #4
0
def main(FLAGS):
    if FLAGS.enable_client:
        print("Using client")
    else:
        print("Not using client")

    imagenet_inference_labels = get_imagenet_inference_labels()
    imagenet_training_labels = get_imagenet_training_labels()

    util.VAL_IMAGE_FLAGS = FLAGS

    assert sorted(imagenet_training_labels) == sorted(
        imagenet_inference_labels)

    validation_nums = get_validation_labels(FLAGS)
    validation_labels = imagenet_inference_labels[validation_nums]

    if FLAGS.enable_client:
        # Server input is dummy
        x_test = np.random.rand(FLAGS.batch_size, FLAGS.image_size,
                                FLAGS.image_size, 3)
    else:
        x_test = get_validation_images(FLAGS)

    config = server_config_from_flags(FLAGS, "input")

    sess = tf.compat.v1.Session(config=config)
    graph_def = load_model(FLAGS.model)

    tf.import_graph_def(graph_def, name="")

    input_tensor = sess.graph.get_tensor_by_name("input:0")
    output_tensor = sess.graph.get_tensor_by_name(
        "MobilenetV2/Logits/Conv2d_1c_1x1/BiasAdd:0")

    print("performing inference")
    start_time = time.time()
    y_pred = sess.run(output_tensor, {input_tensor: x_test})
    end_time = time.time()
    runtime = end_time - start_time
    per_image_runtime = runtime / float(FLAGS.batch_size)
    print("performed inference, runtime (s):", np.round(runtime, 2))
    print("runtime per image (s)", np.round(per_image_runtime, 2))
    y_pred = np.squeeze(y_pred)

    if FLAGS.batch_size == 1:
        top5 = y_pred.argsort()[-5:]
    else:
        top5 = np.flip(y_pred.argsort()[:, -5:], axis=1)

    if not FLAGS.enable_client:
        preds = imagenet_training_labels[top5]

        if FLAGS.batch_size < 10:
            print("validation_labels", validation_labels)
            print("validation_labels shape", validation_labels.shape)
            print("preds", preds)
            print("preds shape", preds.shape)

        util.accuracy(preds, validation_labels)
Пример #5
0
def loss_distil(x, y, student, teacher, lossfunc):
    tau = 1
    with torch.no_grad():
        logits_t, logits_t_pure, feat_t, fmaps_t = teacher(x, y)
        # acc_t_pure = util.accuracy(logits_t_pure, y)
        # acc_t_raw = util.accuracy(logits_t, y)
        # print(acc_t_pure, acc_t_raw)
        # for aa in fmaps_t:
        # 	print(aa.shape)

    logits_s, logits_s_pure, feat_s, fmaps_s = student(x, y)
    # for bb in fmaps_s:
    # 	print(bb.shape)
    acc = util.accuracy(logits_s, y)
    acc_pure = util.accuracy(logits_s_pure, y)
    loss_nll = lossfunc(logits_s, y)
    loss_logits = F.softmax(logits_t_pure / tau, dim=-1) * (F.log_softmax(
        logits_t_pure / tau, dim=-1) - F.log_softmax(logits_s_pure, dim=-1))
    loss_logits = torch.mean(loss_logits, dim=0)  # average among batch
    loss_logits = torch.sum(loss_logits)  # sum among classes
    loss_fmap = sum(
        [torch.mean(torch.abs(s - t))
         for s, t in zip(fmaps_s, fmaps_t)]) / len(fmaps_t)
    loss_feat = torch.mean(torch.abs(feat_s - feat_t))
    loss_total = loss_nll + loss_feat + loss_logits + loss_fmap
    return acc, acc_pure, loss_total, loss_nll, loss_fmap, loss_feat, loss_logits
Пример #6
0
def test(test_loader, nets, criterions):
    cls1_losses = AverageMeter()
    dml1_losses = AverageMeter()
    cls2_losses = AverageMeter()
    dml2_losses = AverageMeter()
    top11 = AverageMeter()
    top51 = AverageMeter()
    top12 = AverageMeter()
    top52 = AverageMeter()

    net1 = nets['net1']
    net2 = nets['net2']

    criterionCls = criterions['criterionCls']
    criterionDML = criterions['criterionDML']

    net1.eval()
    net2.eval()

    end = time.time()
    for idx, (img, target) in enumerate(test_loader, start=1):
        if args.cuda:
            img = img.cuda()
            target = target.cuda()

        with torch.no_grad():
            _, _, _, _, output1 = net1(img)
            _, _, _, _, output2 = net2(img)

        # for net1
        cls1_loss = criterionCls(output1, target)
        dml1_loss = criterionDML(F.log_softmax(
            output1, dim=1), F.softmax(output2.detach(), dim=1)) / img.size(0)
        dml1_loss = dml1_loss * args.lambda_dml

        prec11, prec51 = accuracy(output1, target, topk=(1, 5))
        cls1_losses.update(cls1_loss.item(), img.size(0))
        dml1_losses.update(dml1_loss.item(), img.size(0))
        top11.update(prec11.item(), img.size(0))
        top51.update(prec51.item(), img.size(0))

        # for net2
        cls2_loss = criterionCls(output2, target)
        dml2_loss = criterionDML(F.log_softmax(
            output2, dim=1), F.softmax(output1.detach(), dim=1)) / img.size(0)
        dml2_loss = dml2_loss * args.lambda_dml

        prec12, prec52 = accuracy(output2, target, topk=(1, 5))
        cls2_losses.update(cls2_loss.item(), img.size(0))
        dml2_losses.update(dml2_loss.item(), img.size(0))
        top12.update(prec12.item(), img.size(0))
        top52.update(prec52.item(), img.size(0))

    f_l = [cls1_losses.avg, dml1_losses.avg, top11.avg, top51.avg]
    f_l += [cls2_losses.avg, dml2_losses.avg, top12.avg, top52.avg]
    print('Cls1: {:.4f}, DML1: {:.4f}, Prec@1_1: {:.2f}, Prec@5_1: {:.2f}'
          'Cls2: {:.4f}, DML2: {:.4f}, Prec@1_2: {:.2f}, Prec@5_2: {:.2f}'.
          format(*f_l))
Пример #7
0
def main(FLAGS):
    util.VAL_IMAGE_FLAGS = FLAGS

    imagenet_inference_labels = get_imagenet_inference_labels()
    imagenet_training_labels = get_imagenet_training_labels()
    assert (
        sorted(imagenet_training_labels) == sorted(imagenet_inference_labels))
    validation_nums = get_validation_labels(FLAGS)
    x_test = get_validation_images(FLAGS)
    validation_labels = imagenet_inference_labels[validation_nums]

    if FLAGS.batch_size < 10:
        print('validation_labels', validation_labels)

    (batch_size, width, height, channels) = x_test.shape
    print('batch_size', batch_size)
    print('width', width)
    print('height', height)
    print('channels', channels)

    x_test_flat = x_test.flatten(order='C')
    hostname = 'localhost'
    port = 34000

    if 'NGRAPH_COMPLEX_PACK' in os.environ:
        complex_packing = str2bool(os.environ['NGRAPH_COMPLEX_PACK'])
    else:
        complex_packing = False

    client = pyhe_client.HESealClient(FLAGS.hostname, port, batch_size,
                                      x_test_flat, complex_packing)

    while not client.is_done():
        time.sleep(1)
    results = client.get_results()

    imagenet_labels = get_imagenet_labels()
    results = np.array(results)

    if (FLAGS.batch_size == 1):
        top5 = results.argsort()[-5:]
    else:
        results = np.reshape(results, (FLAGS.batch_size, 1001))
        top5 = np.flip(results.argsort()[:, -5:], axis=1)

    preds = imagenet_labels[top5]
    print('validation_labels', validation_labels)
    print('top5', preds)

    util.accuracy(preds, validation_labels)
Пример #8
0
def test(test_loader, net, criterion, testF):
	losses = AverageMeter()
	top1   = AverageMeter()
	top5   = AverageMeter()

	net.eval()

	end = time.time()
	for idx, (img, target) in enumerate(test_loader, start=1):
		if args.cuda:
			img = img.cuda()
			target = target.cuda()

		with torch.no_grad():
			_, _, _, _, output = net(img)
			loss = criterion(output, target)

		prec1, prec5 = accuracy(output, target, topk=(1,5))
		losses.update(loss.item(), img.size(0))
		top1.update(prec1.item(), img.size(0))
		top5.update(prec5.item(), img.size(0))

	f_l = [losses.avg, top1.avg, top5.avg]
	print('Loss: {:.4f}, Prec@1: {:.2f}, Prec@5: {:.2f}'.format(*f_l))

	testF.write('{},{},{}\n'.format(epoch, losses.avg, top1.avg))
	testF.flush()

	return top1.avg, top5.avg
Пример #9
0
def train(model, train_iterator, criterion, optimizer, device):
    preds = []
    labels = []
    running_avg_loss = .0
    model.train()
    for i, batch in tqdm(enumerate(train_iterator)):
        x = batch.text.t().to(device)
        pred = model(x)
        y = batch.label.to(device)
        loss = criterion(pred[:, 1], y)

        running_avg_loss += (loss.item() - running_avg_loss) / (i + 1)
        preds.append(pred)
        labels.append(y)

        print(f'running average training loss: {running_avg_loss:.3f}')

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    preds = torch.cat(preds, dim=0)
    labels = torch.cat(labels, dim=0)
    acc = accuracy(preds, labels)
    print(f'training accuracy is: {acc:.3f} \n')
Пример #10
0
def test(val_loader, model, criterion, cuda=0):
	batch_time = AverageMeter()
	losses = AverageMeter()
	top1 = AverageMeter()

	model.eval()
	end = time.time()
	for i, (input, target) in enumerate(val_loader):

		if cuda:
			input = input.cuda(async = True)
			target = target.cuda(async = True)

		output = model(input)
		output, target = optimize_loss_function(output, target)
		loss = criterion(output, target)

		prec1 = util.accuracy(output.data, target, topk = (1,))
		losses.update(loss.data, input.size(0))
		top1.update(prec1[0][0], input.size(0))

		batch_time.update(time.time() - end)
		end = time.time()

		if i!= 0 and i % args.print_freq == 0:
			print('Test: [{0}/{1}]  Time {batch_time.val:.3f} ({batch_time.avg:.3f})  '
				  'Loss {loss.val:.4f} ({loss.avg:.4f})  Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
				   i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1))
			gc.collect()

	print(' * Prec@1 {top1.avg:.3f}'.format(top1 = top1))
	return top1.avg
Пример #11
0
def main():
    args = setup_argparser().parse_args()

    filename = args.file
    num_trees = args.num_trees
    sampling_ratio = args.sampling_ratio
    max_depth = args.max_depth
    min_size = args.min_size
    features_ratio = args.features_ratio

    x, labels = import_file(filename)
    rf = RandomForest(num_trees=num_trees,
                      sampling_ratio=sampling_ratio,
                      max_depth=max_depth,
                      min_size=min_size,
                      features_ratio=features_ratio)
    rf.fit(x)
    predictions = rf.predict(x)
    p, r, f1 = get_metrics(labels, predictions, class_label=1)
    acc = accuracy(labels, predictions)
    print("Naive results")
    print("Accuracy: {}, Precision: {}, Recall: {}, F-1: {}".format(
        acc, p, r, f1))

    ten_cv = CrossValidation(k=10)
    rf = RandomForest(num_trees=num_trees,
                      sampling_ratio=sampling_ratio,
                      max_depth=max_depth,
                      min_size=min_size,
                      features_ratio=features_ratio)
    train_scores, val_scores, *_ = ten_cv.cross_validate(rf, x, labels)
    print("10-fold cross validation")
    print("Training scores: {0}\nValidation scores: {1}".format(
        train_scores, val_scores))
    return
def speech_tagging_test():
    st_time = time.time()
    data = Dataset("pos_tags.txt",
                   "pos_sentences.txt",
                   train_test_split=0.8,
                   seed=0)

    data.train_data = data.train_data[:100]

    data.test_data = data.test_data[:10]

    model = model_training(data.train_data, data.tags)

    tagging = sentence_tagging(data.test_data, model, data.tags)

    total_words = 0
    total_correct = 0
    for i in range(len(tagging)):
        correct, words, accur = accuracy(tagging[i], data.test_data[i].tags)
        total_words += words
        total_correct += correct
        print("accuracy: ", accur)

    print("Your total accuracy: ", total_correct * 1.0 / total_words)
    print("My total accuracy: ", 0.7761904761904762)

    en_time = time.time()
    print("sentence_tagging total time: ", en_time - st_time)
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    # losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)

        prec1, prec5 = accuracy(output.data, target.data, topk=(1, 5))
        # losses.update(loss.data[0], data.size(0))
        top1.update(prec1[0], data.size(0))
        top5.update(prec5[0], data.size(0))

        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print(
                'Train Epoch: {} [{}/{} ({:.0f}%)], Loss: {:.6f}, Top1:{:.3f}, Top5:{:.3f}'
                .format(epoch, batch_idx * len(data),
                        len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), loss.item(),
                        top1.avg, top5.avg))
def train(train_loader, model, classifier, criterion, optimizer, epoch, opt):
    """one epoch training"""
    model.eval()
    classifier.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    end = time.time()
    for idx, (images, labels) in enumerate(train_loader):
        data_time.update(time.time() - end)

        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        bsz = labels.shape[0]

        # warm-up learning rate
        warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer)

        # compute loss
        with torch.no_grad():
            features = model.encoder(images)
        output = classifier(features.detach())
        loss = criterion(output, labels)

        # update metric
        losses.update(loss.item(), bsz)
        acc1, acc5 = accuracy(output, labels, topk=(1, 5))
        top1.update(acc1[0], bsz)
        top5.update(acc5[0], bsz)

        # SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print info
        if (idx + 1) % opt.print_freq == 0:
            logging.info('Train: [{0}][{1}/{2}]\t'
                         'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                         'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
                         'loss {loss.val:.3f} ({loss.avg:.3f})\t'
                         'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                             epoch,
                             idx + 1,
                             len(train_loader),
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             top1=top1))
            sys.stdout.flush()

    return losses.avg, top1.avg, top5.avg
def train_kd_on_the_fly(args, model, teacher, device, optimizer, dataloader, epoch):
    # set model to training mode
    model.train()
    teacher.eval()
    top1 = AverageMeter()
    top5 = AverageMeter()

    for i, (train_batch, labels_batch) in enumerate(dataloader):
        # convert to torch Variables
        train_batch, labels_batch = Variable(train_batch), Variable(labels_batch)

        # compute model output, fetch teacher output, and compute KD loss
        output_batch = model(train_batch)

        # get one batch output from teacher_outputs list
        output_teacher_batch = teacher(train_batch)

        loss = loss_fn_kd(output_batch, labels_batch, output_teacher_batch, args)
        prec1, prec5 = accuracy(output_batch.data, labels_batch.data, topk=(1, 5))
        # losses.update(loss.data[0], data.size(0))
        top1.update(prec1[0], train_batch.size(0))
        top5.update(prec5[0], train_batch.size(0))

        # clear previous gradients, compute gradients of all variables wrt loss
        optimizer.zero_grad()
        loss.backward()

        # performs updates using calculated gradients
        optimizer.step()

        # Evaluate summaries only once in a while
        if i % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)], Loss: {:.6f}, Top1:{:.3f}, Top5:{:.3f}'.format(
                epoch, i * len(train_batch), len(dataloader.dataset),
                100. * i / len(dataloader), loss.item(), top1.avg, top5.avg))
Пример #16
0
def test(val_loader, model, criterion, cuda, print_freq):
    batch_time = util.AverageMeter()
    losses = util.AverageMeter()
    top1 = util.AverageMeter()
    prfa = util.AverageMeterPRFA()

    # switch to evaluate mode
    model.eval()
    end = time.time()
    for i, (input, target, seq_lengths) in enumerate(val_loader):

        if cuda:
            input = input.cuda()
            target = target.cuda()

        # compute output
        output = model(input, seq_lengths)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prfa_all = util.prf_multi_classify(output.data, target, topk=(1, ))
        prfa.update(prfa_all, seq_lengths.size(0))
        prec1 = util.accuracy(output.data, target, topk=(1, ))
        losses.update(loss.data, input.size(0))
        top1.update(prec1[0][0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        four_classify_loop_print(i, print_freq, val_loader, batch_time, losses,
                                 top1, prfa)

    four_classify_last_print(top1, prfa)
    return top1.avg
Пример #17
0
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()
    losses = AverageMeter()
    train_acc = AverageMeter()

    for (inputs, targets) in tqdm(train_loader):

        inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)
        optimizer.zero_grad()

        # forward
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # compute gradient and do SGD step
        loss.backward()
        optimizer.step()

        # measure accuracy and record loss
        acc = accuracy(outputs.data, targets.data)
        losses.update(loss.item(), inputs.size(0))
        train_acc.update(acc.item(), inputs.size(0))

    return losses.avg, train_acc.avg
def test(test_loader, nets, criterions):
	cls_losses    = AverageMeter()
	fitnet_losses = AverageMeter()
	top1          = AverageMeter()
	top5          = AverageMeter()

	snet = nets['snet']
	tnet = nets['tnet']

	criterionCls    = criterions['criterionCls']
	criterionFitnet = criterions['criterionFitnet']

	snet.eval()

	end = time.time()
	for idx, (img, target) in enumerate(test_loader, start=1):
		if args.cuda:
			img = img.cuda()
			target = target.cuda()

		with torch.no_grad():
			_, _, _, rb3_s, output_s = snet(img)
			_, _, _, rb3_t, output_t = tnet(img)

		cls_loss = criterionCls(output_s, target)
		fitnet_loss  = criterionFitnet(rb3_s, rb3_t.detach()) * args.lambda_fitnet

		prec1, prec5 = accuracy(output_s, target, topk=(1,5))
		cls_losses.update(cls_loss.item(), img.size(0))
		fitnet_losses.update(fitnet_loss.item(), img.size(0))
		top1.update(prec1.item(), img.size(0))
		top5.update(prec5.item(), img.size(0))

	f_l = [cls_losses.avg, fitnet_losses.avg, top1.avg, top5.avg]
	print('Cls: {:.4f}, Fitnet: {:.4f}, Prec@1: {:.2f}, Prec@5: {:.2f}'.format(*f_l))
Пример #19
0
def train_with_distill(d_net, optimizer, device, train_loader, criterion):

    d_net.to(device)
    d_net.s_net.to(device)
    d_net.t_net.to(device)
    d_net.train()
    d_net.s_net.train()
    d_net.t_net.train()
    top1 = AverageMeter()

    for batch_idx, (inputs, targets) in enumerate(train_loader):

        inputs, targets = inputs.to(device), targets.to(device)

        batch_size = inputs.shape[0]
        outputs, loss_distill = d_net(inputs)
        loss_CE = criterion(outputs, targets)
        loss = loss_CE + 1e-4 * loss_distill.sum() / batch_size

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        outputs = outputs.float()
        prec1 = accuracy(outputs.data, targets)[0]
        top1.update(prec1.item(), inputs.size(0))

    return top1.avg
Пример #20
0
def validate(val_loader, model, classifier, criterion, opt):
    """validation"""
    model.eval()
    classifier.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    with torch.no_grad():
        end = time.time()
        correct_0, correct_1, total_0, total_1 = 0, 0, 0, 0

        for idx, (images, labels) in enumerate(val_loader):
            images = images.float().cuda()
            labels = labels.cuda()
            bsz = labels.shape[0]

            # forward
            output = classifier(model.encoder(images))
            loss = criterion(output, labels)

            # update metric
            losses.update(loss.item(), bsz)
            acc1, acc5 = accuracy(output, labels, topk=(1, 1))

            #celeba
            _, pred = output.topk(1, 1, True, True)
            pred = pred.t()
            target = labels.view(1, -1)
            correct = pred.eq(target.expand_as(pred))
            correct_0 += correct[target == 0].sum()
            correct_1 += correct[target == 1].sum()
            total_0 += (target == 0).sum()
            total_1 += (target == 1).sum()

            top1.update(acc1[0], bsz)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if idx % opt.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          idx,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1))

    print(' * Acc@1 {top1.avg:.3f}'.format(top1=top1))
    if opt.dataset == 'celeba':
        print(
            f' * Correct class 0: {correct_0.float()/total_0.float():.4f} (total: {total_0})\n * Correct class 1: {correct_1.float()/total_1.float():.4f} (total: {total_1})'
        )

    return losses.avg, top1.avg
Пример #21
0
def main():
    args = setup_argparser().parse_args()

    filename = args.file
    max_depth = args.max_depth
    min_size = args.min_size
    validate = args.validate

    x, labels = import_file(filename)
    dt = DecisionTree(max_depth=max_depth, min_size=min_size)
    tree = dt.fit(x)
    TreeNode.show_tree(tree)
    predicted_labels = dt.predict(x)
    p, r, f1 = get_metrics(labels, predicted_labels, class_label=1)
    acc = accuracy(labels, predicted_labels)
    print("Naive results")
    print("Accuracy: {}, Precision: {}, Recall: {}, F-1: {}".format(
        acc, p, r, f1))

    if validate:
        ten_cv = CrossValidation(k=10)
        dt = DecisionTree(max_depth=max_depth, min_size=min_size)
        train_scores, val_scores, *_ = ten_cv.cross_validate(dt, x, labels)
        print("10-fold cross validation")
        print("Training scores: {0}\nValidation scores: {1}".format(
            train_scores, val_scores))
    return
Пример #22
0
def train_mnist(max_iter, learning_rate, f):
    resume = os.path.exists('./session/alexnet_mnist.meta')

    #Load data
    x_train_raw = loadData.load_train_images().reshape([-1, 784]) / 255
    y_train_raw = loadData.load_train_labels()

    batch_size = 10000
    data_length = x_train_raw.shape[0]
    num_batches = (int)(data_length / batch_size)

    x_train = tf.placeholder(tf.float32, [None, 784])
    y_train = tf.placeholder(tf.int32, [None])
    y_onehot = tf.one_hot(y_train, 10)
    x = tf.reshape(x_train, [-1, 28, 28, 1])

    y = AlexNet.classifier(x)
    global_step = tf.Variable(0, trainable=False)
    lr = tf.placeholder(tf.float32)

    #cost function and accuracy
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_onehot, logits=y))
    train = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        loss, global_step=global_step)
    accuracy = util.accuracy(y, y_onehot)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=True)
    saver = tf.train.Saver()
    sess = tf.Session(config=config)

    if resume:
        saver.restore(sess, './session/alexnet_mnist')
    else:
        sess.run(tf.global_variables_initializer())

#training
    step = 0
    while step < max_iter:
        split = 0
        for i in range(num_batches):
            x_b = x_train_raw[split:(split + batch_size)]
            y_b = y_train_raw[split:(split + batch_size)]
            split += batch_size
            _, l, acc, gs = sess.run([train, loss, accuracy, global_step],
                                     feed_dict={
                                         lr: learning_rate,
                                         x_train: x_b,
                                         y_train: y_b
                                     })
            step = (gs - 1) / 6
            if (step % f == 0):
                print('Iter: %d, Loss: %f Acc: %f' % (step, l, acc))

#Save model for continuous learning
    saver.save(sess, './session/alexnet_mnist')

    sess.close()
Пример #23
0
def plot_single(TEST, DATE, start_cycle, end_cycle):
    #PARAMETERS
    FONTSIZE = 18
    HOME = os.environ['HOME']

    supply_curr = np.load('plot/data/harvard_'+DATE+'_supply_curr_'  +TEST +'.npy')
    supply_volt = np.load('plot/data/harvard_'+DATE+'_supply_volt_'  +TEST +'.npy')
    action =      np.load('plot/data/harvard_'+DATE+'_action_'+TEST +'.npy')
    VE =          np.load('plot/data/harvard_'+DATE+'_VE_'+TEST +'.npy')

    #correct stat dump being not every cycle
    action = np.roll(action,-1)

    end_cycle = min(end_cycle,len(supply_curr))

    fig = plt.figure(constrained_layout=True)
    gs = fig.add_gridspec(4, 7)
    ax = fig.add_subplot(gs[0, :])
    # axb = fig.add_subplot(gs[1, :])
    # axd = fig.add_subplot(gs[2, :])
    axc = fig.add_subplot(gs[1, 0])

    fig.set_size_inches(40, 15)
    fig.suptitle('(Harvard)' +  ', DESKTOP, ' + TEST + ' )', fontsize=FONTSIZE)
    ax.set_ylabel('Supply Voltage', fontsize=FONTSIZE)
    ax2 = ax.twinx()
    ax2.set_ylabel('Current', color='tab:blue', fontsize=FONTSIZE)  # we already handled the x-label with ax1

    xvar = np.linspace(0,len(supply_volt),len(supply_volt))

    ax.set_title('Supply Voltage Over Time', fontsize=FONTSIZE)
    ax.plot(xvar, supply_volt,color='black', linewidth=1.0)
    ax.set_xlim(left = start_cycle, right = end_cycle)
    ax.set_ylim(bottom = min(i for i in supply_volt if i > 0.8), top = max(supply_volt))
    ax2.plot(xvar, supply_curr, color='tab:blue')
    ax2.tick_params(axis='y', labelcolor='tab:blue')
    ax2.set_ylim([min(i for i in supply_curr if i > 0.8), max(supply_curr)])

    for i in range(len(supply_volt)):
        if action[i]:
            ax.axvspan(i, i+1, color='red', alpha=0.15)
        if VE[i]:
            ax.axvspan(i, i+1, color='blue', alpha=0.3)

    print('NUM VEs: ', sum(VE))
    print('NUM actions: ', sum(action))

    xvar,hits,false_neg,false_pos_x,false_pos = util.accuracy(action,VE, LEAD_TIME_CAP=80)   
    axc.set_xlim([0,max(xvar[-1],false_pos_x[-1])])
    axc.plot(xvar, hits, color='black', linewidth=1.0, label='hits')
    axc.plot(xvar, false_neg, color='red', linewidth=1.0, label='false negatives')
    axc.plot(false_pos_x, false_pos, color='blue', linewidth=1.0, label='false positives')
    axc.legend()
    axc.set_title('Accuracy', fontsize=14)
    axc.set_xlabel('Lead Time', fontsize=14) 
    axc.set_ylabel('(%)', fontsize=14)

    plt.savefig(HOME+ '/passat/plot/' + DATE + '_Vs&Is_vs_time' + '_harvard_' + TEST +'.png', dpi=300)
    print(HOME+ '/passat/plot/' + DATE + '_Vs&Is_vs_time' + '_harvard_' + TEST +'.png')
Пример #24
0
def train_finetune(train_loader, model, criterion, optimizer, epoch, args,
                   scaler):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader),
                             [batch_time, data_time, losses, top1, top5],
                             prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()
    end = time.time()

    is_main = not args.multiprocessing_distributed or (
        args.multiprocessing_distributed
        and args.rank % torch.cuda.device_count() == 0)

    for i, (images, target) in enumerate(train_loader):
        optimizer.zero_grad()

        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            images = images.cuda(args.gpu, non_blocking=True)
        if torch.cuda.is_available():
            target = target.cuda(args.gpu, non_blocking=True)

        with autocast():
            # compute output
            output = model(images)
            loss = criterion(output, target)

        scaler.scale(loss).backward()

        # 去掉剪枝梯度
        for m in model.modules():
            if isinstance(m, nn.Conv2d):
                weight_copy = m.weight.data.abs().clone()
                mask = weight_copy.gt(0).float().cuda()
                m.weight.grad.data.mul_(mask)

        scaler.step(optimizer)

        # Updates the scale for next iteration.
        scaler.update()

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
Пример #25
0
def validate(val_loader, model, classifier, criterion, opt):
    """
    evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    classifier.eval()

    with torch.no_grad():
        end = time.time()
        for idx, (input, target) in enumerate(val_loader):

            input = input.float()
            if opt.gpu is not None:
                input = input.cuda(opt.gpu, non_blocking=True)
            target = target.cuda(opt.gpu, non_blocking=True)

            # compute output
            if not opt.view == 'temporal':
                feat_l, feat_ab = model(input, opt.layer)
                feat = torch.cat((feat_l.detach(), feat_ab.detach()), dim=1)
            else:
                feat = model(input, opt.layer)
            output = classifier(feat)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))
            top5.update(acc5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if idx % opt.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          idx,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return top1.avg, top5.avg, losses.avg
Пример #26
0
def test12(test_loader, nets, epoch):
    clf_losses = [AverageMeter() for _ in range(5)]
    top1 = [AverageMeter() for _ in range(5)]
    consistency_meters = [AverageMeter() for _ in range(2)]

    snet = nets['snet']
    tnet = nets['tnet']
    snet.eval()
    tnet.eval()

    for idx, (img, target) in enumerate(test_loader, start=1):
        img = cpu_gpu(args.cuda, img, volatile=True)
        target = cpu_gpu(args.cuda, target, volatile=True)
        out1_t, out2_t = tnet(img)
        out1_s, out2_s = snet(img)
        cls_t1 = F.cross_entropy(out1_t, target)
        cls_t2 = F.cross_entropy(out2_t, target)
        cls_s1 = F.cross_entropy(out1_s, target)
        cls_s2 = F.cross_entropy(out2_s, target)

        preds1_t = [out1_t, out1_s]
        preds2_t = [out2_t, out2_s]

        if 'New' in args.pseudo_label_type:
            pseudo, pseudo_loss = get_pseudo_loss_new(preds1_t, preds2_t, consistency_meters, weight_clip=True)
        elif 'Old' in args.pseudo_label_type:
            pseudo, pseudo_loss = get_pseudo_loss(preds1_t, preds2_t, consistency_meters, softmax_pseudo_label=True)
        else:
            mean_t = (out1_t + out2_t) * 0.5
            mean_s = (out1_s + out2_s) * 0.5
            if using_soft_discrepancy:
                intra_t = soft_discrepancy(out1_t, out2_t) * intra_t_ratio
                intra_s = soft_discrepancy(out1_s, out2_s) * intra_s_ratio
            else:
                intra_t = discrepancy(out1_t, out2_t) * intra_t_ratio
                intra_s = discrepancy(out1_s, out2_s) * intra_s_ratio
            length = intra_t + intra_s
            wt = intra_t.data / length
            ws = intra_s.data / length
            pseudo = wt * mean_t.detach() + ws * mean_s.detach()

        pseudo_cls = F.cross_entropy(pseudo, target)

        out = [out1_t, out2_t, out1_s, out2_s, pseudo]
        accu = [accuracy(pred, target, topk=(1,))[0] for pred in out]
        for acc, top in zip(accu, top1):
            top.update(acc, img.size(0))
        cls_loss = [cls_t1, cls_t2, cls_s1, cls_s2, pseudo_cls]
        for loss, losses in zip(cls_loss, clf_losses):
            losses.update(loss.item(), img.size(0))

    result = 'Epoch:{}, cls-loss:({:.3f},{:.3f},{:.3f},{:.3f},{:.3f}), ' \
             'top1:({:.4f},{:.4f},{:.4f},{:.4f},{:.4f})'. \
        format(epoch, clf_losses[0].avg, clf_losses[1].avg, clf_losses[2].avg, clf_losses[3].avg, clf_losses[4].avg,
               top1[0].avg, top1[1].avg, top1[2].avg, top1[3].avg, top1[4].avg)
    print(result)
    return [top1[0].avg, top1[1].avg, top1[2].avg, top1[3].avg, top1[4].avg]
Пример #27
0
def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(block_feat, block_adj, block_pool)
    loss_train = F.nll_loss(output[train_idx], y[train_idx])
    acc_train = util.accuracy(output[train_idx], y[train_idx])
    loss_train.backward()
    optimizer.step()

    loss_val = F.nll_loss(output[test_idx], y[test_idx])
    acc_val = util.accuracy(output[test_idx], y[test_idx])
    print('Epoch: {:04d}'.format(epoch + 1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))
Пример #28
0
def test_keann_kg(val_loader,
                  model,
                  criterion,
                  cuda,
                  print_freq,
                  pdtb_category=''):
    batch_time = util.AverageMeter()
    losses = util.AverageMeter()
    top1 = util.AverageMeter()
    prfa = util.AverageMeterPRFA()
    if pdtb_category != '':
        prfa = util.AverageBinaryMeterPRFA()

    # switch to evaluate mode
    model.eval()
    end = time.time()
    for i, (arg1, arg2, target, seq_lengths, transE_tensor_arg1,
            transE_tensor_arg2, batch_original) in enumerate(val_loader):

        arg1 = Variable(arg1, requires_grad=False)
        arg2 = Variable(arg2, requires_grad=False)
        target = Variable(target, requires_grad=False)

        if cuda:
            arg1 = arg1.cuda()
            arg2 = arg2.cuda()
            target = target.cuda()

        # compute output
        output = model((arg1, arg2), seq_lengths,
                       (transE_tensor_arg1, transE_tensor_arg2), cuda)
        # hot_image(model.encoder(arg1), model.encoder(arg2), batch_original, model.rand_matrix, model.kg_relation)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prfa_all = util.prf_multi_classify(output.data, target, topk=(1, ))
        prfa.update(prfa_all, seq_lengths.size(0))

        prec1 = util.accuracy(output.data, target, topk=(1, ))
        losses.update(loss.data.cpu().numpy(), seq_lengths.size(0))
        top1.update(prec1[0][0], seq_lengths.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if pdtb_category != '':
            binary_classify_loop_print(i, print_freq, val_loader, batch_time,
                                       losses, top1, prfa, pdtb_category)
        else:
            four_classify_loop_print(i, print_freq, val_loader, batch_time,
                                     losses, top1, prfa)

    if pdtb_category != '':
        binary_classify_last_print(top1, prfa)
    else:
        four_classify_last_print(top1, prfa)
    return top1.avg
Пример #29
0
def validate(val_loader, model, classifier, criterion):
    """
    evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    classifier.eval()

    with torch.no_grad():
        end = time.time()
        for idx, (input, target) in enumerate(val_loader):

            input = input.float()
            input = input.cuda()
            target = target.cuda().long()

            im = input * 255
            im = im.float()
            im = im.cuda()
            feat = model({'rgb': im})
            feat = feat.mean(dim=2).mean(dim=2)

            output = classifier(feat)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))
            top5.update(acc5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if idx % 10 == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          idx,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return top1.avg, top5.avg, losses.avg
Пример #30
0
def create_model(pos_tweets, neg_tweets, neu_tweets, classifier_param='LinearSVC'):

    # filter away words that are less than 3 letters to form the training training_data
    tweets = []
    for (words, sentiment) in pos_tweets + neg_tweets + neu_tweets:
        words = util.clean_text(words, True)
        words_filtered = [e.lower() for e in words.split() if len(e) >= 3]
        #words_filtered = [' '.join(w) for w in [ x for x in nltk.bigrams(words.split())]]
        tweets.append((words_filtered, sentiment))

    # make sure tweets are shuffled randomly
    shuffle(tweets)

    # get the training set and train the Classifier
    training_set = nltk.classify.util.apply_features(extract_features, tweets)

    max_specificity = -1
    best_classifier = None
    average_accuracy = 0.0

    # perform 10-fold cross validation
    cv = cross_validation.KFold(len(training_set), n_folds=10, shuffle=False, random_state=None)
    for traincv, testcv in cv:

        if classifier_param == "LinearSVC":
            classifier = SklearnClassifier(LinearSVC()).train(training_set[traincv[0]:traincv[len(traincv)-1]])
        elif classifier_param == "Tfid":
            # does TF-IDF weighting,
            # chooses the 1000 best features based on a chi2 statistic,
            # and then passes that into a multinomial naive Bayes classifier.
            pipeline = Pipeline([('tfidf', TfidfTransformer()), \
                                   ('chi2', SelectKBest(chi2, k=1000)), \
                                   ('nb', MultinomialNB())])
            classifier = SklearnClassifier(pipeline).train(training_set[traincv[0]:traincv[len(traincv)-1]])
        elif classifier_param == "Bernoulli":
            classifier = SklearnClassifier(BernoulliNB()).train(training_set[traincv[0]:traincv[len(traincv)-1]])
        elif classifier_param == "NaiveBayes":
            classifier = NaiveBayesClassifier.train(training_set[traincv[0]:traincv[len(traincv)-1]])
        else:
            print "Classifier option not available: ", classifier_param
            sys.exit(1)

        accuracy_of_classifier, specificity = \
            util.accuracy(classifier, tweets[testcv[0]:testcv[len(testcv)-1]])

        average_accuracy += accuracy_of_classifier
        if specificity > max_specificity:
            max_specificity = specificity
            best_classifier = classifier

    print "\naverage accuracy: ", average_accuracy/cv.n_folds

    # save the classifier
    joblib.dump(best_classifier, "model/%s_classifier.pkl" % classifier_param)

    print "saved classifier"
Пример #31
0
def train(train_loader, nets, optimizer, criterions, epoch):
	batch_time = AverageMeter()
	data_time  = AverageMeter()
	cls_losses = AverageMeter()
	st_losses  = AverageMeter()
	top1       = AverageMeter()
	top5       = AverageMeter()

	snet = nets['snet']
	tnet = nets['tnet']

	criterionCls = criterions['criterionCls']
	criterionST  = criterions['criterionST']

	snet.train()

	end = time.time()
	for idx, (img, target) in enumerate(train_loader, start=1):
		data_time.update(time.time() - end)

		if args.cuda:
			img = img.cuda()
			target = target.cuda()

		_, _, _, _, output_s = snet(img)
		_, _, _, _, output_t = tnet(img)

		cls_loss = criterionCls(output_s, target)
		st_loss  = criterionST(F.log_softmax(output_s/args.T, dim=1),
							   F.softmax(output_t/args.T, dim=1)) * (args.T*args.T) / img.size(0)
		st_loss  = st_loss * args.lambda_st
		loss = cls_loss + st_loss

		prec1, prec5 = accuracy(output_s, target, topk=(1,5))
		cls_losses.update(cls_loss.item(), img.size(0))
		st_losses.update(st_loss.item(), img.size(0))
		top1.update(prec1.item(), img.size(0))
		top5.update(prec5.item(), img.size(0))

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		batch_time.update(time.time() - end)
		end = time.time()

		if idx % args.print_freq == 0:
			print('Epoch[{0}]:[{1:03}/{2:03}] '
				  'Time:{batch_time.val:.4f} '
				  'Data:{data_time.val:.4f}  '
				  'Cls:{cls_losses.val:.4f}({cls_losses.avg:.4f})  '
				  'ST:{st_losses.val:.4f}({st_losses.avg:.4f})  '
				  'prec@1:{top1.val:.2f}({top1.avg:.2f})  '
				  'prec@5:{top5.val:.2f}({top5.avg:.2f})'.format(
				  epoch, idx, len(train_loader), batch_time=batch_time, data_time=data_time,
				  cls_losses=cls_losses, st_losses=st_losses, top1=top1, top5=top5))
Пример #32
0
def stats_from_dev_set(stats):
    actuals = []
    predicteds  = []
    for (s1, s2), y in zip(dev_x, dev_y):
        pred_y, cost = test_fn(s1, s2, [y])
        actuals.append(y)
        predicteds.append(pred_y)
        stats.record_dev_cost(cost)
    dev_c = confusion_matrix(actuals, predicteds)
    dev_accuracy = util.accuracy(dev_c)
    stats.set_dev_accuracy(dev_accuracy)
    print "dev confusion\n %s (%s)" % (dev_c, dev_accuracy)
Пример #33
0
def stats_from_dev_set(stats):
    predicteds, actuals = [], []
    for eg, true_labels in zip(dev_x, dev_y):
        b_cost, b_predicted = sess.run([cost, predicted],
                                       feed_dict=eg_and_label_to_feeddict(eg, true_labels))
        stats.record_dev_cost(b_cost)
        for p, a in zip(b_predicted, true_labels):
            predicteds.append(np.argmax(p))
            actuals.append(np.argmax(a))
    dev_c = confusion_matrix(actuals, predicteds)
    dev_accuracy = util.accuracy(dev_c)
    stats.set_dev_accuracy(dev_accuracy)
    print "dev confusion\n %s (%s)" % (dev_c, dev_accuracy)
Пример #34
0
def rte_classifier(trainer, features=rte_features):
    """
    Classify RTEPairs
    """
    train = [(pair, pair.value) for pair in nltk.corpus.rte.pairs(['rte1_dev.xml', 'rte2_dev.xml', 'rte3_dev.xml'])]
    test = [(pair, pair.value) for pair in nltk.corpus.rte.pairs(['rte1_test.xml', 'rte2_test.xml', 'rte3_test.xml'])]

    # Train up a classifier.
    print 'Training classifier...'
    classifier = trainer( [(features(pair), label) for (pair,label) in train] )

    # Run the classifier on the test data.
    print 'Testing classifier...'
    acc = accuracy(classifier, [(features(pair), label) for (pair,label) in test])
    print 'Accuracy: %6.4f' % acc

    # Return the classifier
    return classifier
Пример #35
0
  def build_graph(self, data_paths, batch_size, is_training):
    """Builds generic graph for training or eval."""
    tensors = GraphReferences()

    _, tensors.examples = util.read_examples(
        data_paths,
        batch_size,
        shuffle=is_training,
        num_epochs=None if is_training else 2)

    parsed = parse_examples(tensors.examples)

    # Build a Graph that computes predictions from the inference model.
    logits = inference(parsed['images'], self.hidden1, self.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss_value = loss(logits, parsed['labels'])

    # Add to the Graph the Ops that calculate and apply gradients.
    if is_training:
      tensors.train, tensors.global_step = training(loss_value,
                                                    self.learning_rate)
    else:
      tensors.global_step = tf.Variable(0, name='global_step', trainable=False)

    # Add means across all batches.
    loss_updates, loss_op = util.loss(loss_value)
    accuracy_updates, accuracy_op = util.accuracy(logits, parsed['labels'])

    if not is_training:
      # Remove this if once Tensorflow 0.12 is standard.
      try:
        tf.contrib.deprecated.scalar_summary('accuracy', accuracy_op)
        tf.contrib.deprecated.scalar_summary('loss', loss_op)
      except AttributeError:
        tf.scalar_summary('accuracy', accuracy_op)
        tf.scalar_summary('loss', loss_op)

    tensors.metric_updates = loss_updates + accuracy_updates
    tensors.metric_values = [loss_op, accuracy_op]
    return tensors
Пример #36
0
        t1 = t1.rstrip('\r\n').decode('utf-8')
        t2 = t2.rstrip('\r\n').decode('utf-8')
        if u'sent' in t1 or not t1:
            s += 0.5
            continue
        tagged += 1
        t1, t2 = t1.split('\t'), t2.split('\t')
        if len(t1) < 3 or len(t2) < 3:
            print >> sys.stderr, t1, t2
            continue
        try:
            tag1, tag2 = ','.join(t1[2].split()[2:]), ','.join(t2[2].split()[2:])
        except:
            continue
        if args.full:
            tag1, tag2 = full(tag1), full(tag2)
        else:
            tag1, tag2 = pos(tag1), pos(tag2)
        if tag2 == 'UNKN':
            continue
        if tag1 != tag2:
#            print >> sys.stderr, tag1, tag2
            mistagged += 1
    return mistagged, tagged, s


inc = list(chain(*[get_tags_tokens_from_tab(x, withcommas=True)[2] for x in get_sentences_from_tab(args.test)]))
ref = list(chain(*[get_tags_tokens_from_tab(x, withcommas=True)[2] for x in get_sentences_from_tab(args.gold)]))
a, w = accuracy(inc, ref, verbose=True)
print a
Пример #37
0
            test_data = util.batch_data(pickle['test'], time_batch_len = 1, 
                max_time_batches = -1, softmax = True)
    else:
        raise Exception("Other datasets not yet implemented")
        
    print config

    with tf.Graph().as_default(), tf.Session() as session:
        with tf.variable_scope("model", reuse=None):
            test_model = model_class(config, training=False)

        saver = tf.train.Saver(tf.all_variables())
        model_path = os.path.join(os.path.dirname(args.config_file), 
            config.model_name)
        saver.restore(session, model_path)
        
        test_loss, test_probs = util.run_epoch(session, test_model, test_data, 
            training=False, testing=True)
        print 'Testing Loss: {}'.format(test_loss)

        if config.dataset == 'softmax':
            if args.seperate:
                nottingham_util.seperate_accuracy(test_probs, test_data, num_samples=args.num_samples)
            else:
                nottingham_util.accuracy(test_probs, test_data, num_samples=args.num_samples)

        else:
            util.accuracy(test_probs, test_data, num_samples=50)

    sys.exit(1)
Пример #38
0
  def build_graph(self, data_paths, batch_size, graph_mod):
    """Builds generic graph for training or eval."""
    tensors = GraphReferences()
    is_training = graph_mod == GraphMod.TRAIN
    if data_paths:
      tensors.keys, tensors.examples = util.read_examples(
          data_paths,
          batch_size,
          shuffle=is_training,
          num_epochs=None if is_training else 2)
    else:
      tensors.examples = tf.placeholder(tf.string, name='input', shape=(None,))

    if graph_mod == GraphMod.PREDICT:
      inception_input, inception_embeddings = self.build_inception_graph()
      # Build the Inception graph. We later add final training layers
      # to this graph. This is currently used only for prediction.
      # For training, we use pre-processed data, so it is not needed.
      embeddings = inception_embeddings
      tensors.input_jpeg = inception_input
    else:
      # For training and evaluation we assume data is preprocessed, so the
      # inputs are tf-examples.
      # Generate placeholders for examples.
      with tf.name_scope('inputs'):
        feature_map = {
            'image_uri':
                tf.FixedLenFeature(
                    shape=[], dtype=tf.string, default_value=['']),
            # Some images may have no labels. For those, we assume a default
            # label. So the number of labels is label_count+1 for the default
            # label.
            'label':
                tf.FixedLenFeature(
                    shape=[1], dtype=tf.int64,
                    default_value=[self.label_count]),
            'embedding':
                tf.FixedLenFeature(
                    shape=[BOTTLENECK_TENSOR_SIZE], dtype=tf.float32)
        }
        parsed = tf.parse_example(tensors.examples, features=feature_map)
        labels = tf.squeeze(parsed['label'])
        uris = tf.squeeze(parsed['image_uri'])
        embeddings = parsed['embedding']

    # We assume a default label, so the total number of labels is equal to
    # label_count+1.
    all_labels_count = self.label_count + 1
    with tf.name_scope('final_ops'):
      softmax, logits = self.add_final_training_ops(
          embeddings,
          all_labels_count,
          BOTTLENECK_TENSOR_SIZE,
          dropout_keep_prob=self.dropout if is_training else None)

    # Prediction is the index of the label with the highest score. We are
    # interested only in the top score.
    prediction = tf.argmax(softmax, 1)
    tensors.predictions = [prediction, softmax, embeddings]

    if graph_mod == GraphMod.PREDICT:
      return tensors

    with tf.name_scope('evaluate'):
      loss_value = loss(logits, labels)

    # Add to the Graph the Ops that calculate and apply gradients.
    if is_training:
      tensors.train, tensors.global_step = training(loss_value)
    else:
      tensors.global_step = tf.Variable(0, name='global_step', trainable=False)

    # Add means across all batches.
    loss_updates, loss_op = util.loss(loss_value)
    accuracy_updates, accuracy_op = util.accuracy(logits, labels)

    if not is_training:
      tf.summary.scalar('accuracy', accuracy_op)
      tf.summary.scalar('loss', loss_op)

    tensors.metric_updates = loss_updates + accuracy_updates
    tensors.metric_values = [loss_op, accuracy_op]
    return tensors
Пример #39
0
        if len(x) == max_egs:
            break
    return x, y, n_ignored

# train model
train_x, train_y, train_ignored = load_data(args.train, 
                                            max_egs=int(args.num_from_train))
x_vectorizer = CountVectorizer(binary=True)
train_x_v = x_vectorizer.fit_transform(train_x)
model = linear_model.LogisticRegression()
model.fit(train_x_v, train_y)

# sanity check model against data it was trained on
train_pred = model.predict(train_x_v)

# try against dev data
dev_x, dev_y, dev_ignored = load_data(args.dev,
                                      max_egs=int(args.num_from_dev))
dev_x_v = x_vectorizer.transform(dev_x)
dev_pred = model.predict(dev_x_v)

print "|train|", len(train_x),
print "train_ignored", train_ignored,
print "dev_ignored", dev_ignored

train_c = confusion_matrix(train_y, train_pred)
print "train confusion\n %s (%s)" % (train_c, util.accuracy(train_c))

dev_c = confusion_matrix(dev_y, dev_pred)
print "dev confusion\n %s (%s)" % (dev_c, util.accuracy(dev_c))