示例#1
0
    def run_warmup(self,
                   zdist,
                   states,
                   actions,
                   warm_up,
                   train=True,
                   M=None,
                   prev_alpha=None,
                   prev_read_v=None,
                   force_sharp=False):
        '''
        Run warm-up phase
        '''
        batch_size = states[0].size(0)
        prev_state = None
        h, c = self.engine.init_hidden(batch_size)
        h = utils.check_gpu(self.opts.gpu, h)
        c = utils.check_gpu(self.opts.gpu, c)

        outputs, maps, zs, alphas, alpha_logits = [], [], [], [], []
        init_maps = []

        if utils.check_arg(self.opts, 'do_memory'):
            # initialize memory and alpha
            if M is None:
                M = self.memory.init_memory(batch_size)
            if prev_alpha is None:
                prev_alpha = utils.check_gpu(
                    self.opts.gpu, torch.zeros(batch_size,
                                               self.memory.num_mem))
                mem_wh = int(math.sqrt(prev_alpha.size(1)))
                prev_alpha[:, mem_wh * (mem_wh // 2) + mem_wh // 2] = 1.0
            if prev_read_v is None:
                prev_read_v = utils.check_gpu(
                    self.opts.gpu, torch.zeros(batch_size,
                                               self.opts.memory_dim))
        alpha_losses = 0
        base_imgs_all = []
        hiddens = []
        for i in range(warm_up):
            input_state = states[i]
            prev_state, m, prev_alpha, alpha_loss, z, M, prev_read_v, h, c, init_map, base_imgs, _, cur_hidden = self.run_step(
                input_state, h, c, actions[i], \
                batch_size, prev_read_v, prev_alpha, M, zdist, step=i, force_sharp=force_sharp)
            outputs.append(prev_state)
            maps.append(m)
            alphas.append(prev_alpha)
            alpha_losses += alpha_loss
            zs.append(z)
            base_imgs_all.append(base_imgs)
            hiddens.append(cur_hidden)
            init_maps.append(init_map)

        warm_up_state = [h, c]
        if prev_state is None:
            prev_state = states[
                0]  # warm_up is 0, the initial screen is always used
        return prev_state, warm_up_state, M, prev_read_v, prev_alpha, outputs, maps, alphas, alpha_losses, zs, base_imgs_all, 0, \
               hiddens, init_maps
示例#2
0
def main():
    args = parser.parse_args()
    print_arguments(args)
    in_path = args.images_path
    print('cutting......')
    # in_path = 'input.png'
    cut_path = 'cut_map'
    comb_path = 'comb_map'
    outname = in_path + '_predict.png'
    h_step,w_step,h_rest,w_rest,img_shape,img_exp_shape,data_list = image_cut(in_path,cut_path)
    check_gpu(args.use_gpu)
    print('predicting......')
    infer(args,cut_path,data_list,comb_path)
    print('combining......')
    image_comb(h_step,w_step,h_rest,w_rest,img_shape,img_exp_shape,outname,comb_path)
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--lr',type=float,default=0.1)
    arg('--n_epochs',type=int, default = 5)
    arg('--batch-size',type=int, default= 32)
    arg('--data_dir',type=str,default = 'chest_xray')
    arg('--model',type=str, default ='chexnet',choices = model_list.keys())
    arg('--root',type=str,default ='runs/debug', help = 'checkpoint root')

    args = parser.parse_args()

    train_loader= generate_trainloaders(data_dir= args.data_dir,
                                                batch_size= args.batch_size)

    root = Path(args.root)
    root.mkdir(exist_ok = True, parents = True)

    model = model_list[args.model]
    loss = CrossEntropyLoss()

    utils.fit(
        init_optimizer= lambda lr:SGD(model.parameters(),lr = args.lr),
        args= args,
        model = model,
        train_loader = train_loader,
        criterion= loss,
        n_epochs= args.n_epochs,
        train_on_gpu=check_gpu(),
        dir_save = args.root,
        lr = args.lr,
        base_model=args.model
    )
示例#4
0
def run_pipeline():
    t_int = time.time()
    fprint = lambda msg: print(
        f'{msg} {"="*20} time elapsed = {(time.time()-t_int)/60:.2f} mins')
    fprint('Load train data')
    xtrain = pd.read_hdf('./data/train.h5', 'xtrain')
    xval = pd.read_hdf('./data/train.h5', 'xval')
    fprint('Load test')
    xtest = pd.read_hdf('./data/test.h5', 'xtest')
    fprint('categorizing features')
    cat_fts = [
        'city_get_first', 'platform_get_first', 'device_get_first', 'item_id',
        'location'
    ]

    categorize(xtrain, xval, cat_fts, xtest)

    fprint('reducing memory')
    reduce_numeric_mem_usage(xtrain)
    reduce_numeric_mem_usage(xval)
    reduce_numeric_mem_usage(xtest)

    xtrain.set_index('session_id', inplace=True)
    xval.set_index('session_id', inplace=True)
    xtest.set_index('session_id', inplace=True)

    fprint('Start training')
    device = 'GPU' if check_gpu() else 'CPU'
    params = {
        'iterations': 3000,
        'learning_rate': 0.02,
        'depth': 8,
        'task_type': device
    }
    clf, categorical_ind, mrr = train_model(xtrain, xval, cat_fts, params)

    fprint('Make prediction on test set')
    # pred xtest
    test_pred = clf.predict_proba(xtest.values)[:, 1]
    xtest['pred'] = test_pred
    item_mapper = np.load('./data/item_id_mapper_reverse.npy').item()
    xtest['item_id'] = xtest['item_id'].map(item_mapper)
    test_imps_pred = xtest.groupby(level=0).apply(output_impressions)
    test_imps_pred = test_imps_pred.reset_index(name='recommendation')
    test_imps_pred.to_csv('./data/test_imps_pred.csv', index=False)
    # read sub
    sub = pd.read_csv('./data/submission_popular.csv')
    sub = pd.merge(sub, test_imps_pred, how='left', on='session_id')
    sub.to_csv('./data/sub.csv', index=False)

    sub.drop('item_recommendations', axis=1, inplace=True)
    sub.rename(columns={'recommendation': 'item_recommendations'},
               inplace=True)
    sub.to_csv(f'./data/sub_mrr_{mrr:.4f}.csv', index=False)

    fprint('DONE')
示例#5
0
    def loading_model(self):

        print('Loading %s model' % (self.model_type))
        if self.model_type == 'C3D':
            self.model = C3D()
        elif self.model_type == 'I3D':
            self.model = I3D(num_classes=400, modality='rgb')
        else:
            self.model = P3D199(pretrained=False, num_classes=400, dropout=self.dropout)


        # Transfer classes
        self.model = transfer_model(model=self.model, model_type=self.model_type, num_classes=self.num_classes)

        # Check gpu and run parallel
        if check_gpu() > 0:
            self.model = torch.nn.DataParallel(self.model).cuda()

        # define loss function (criterion) and optimizer
        if check_gpu() > 0:
            self.criterion = nn.CrossEntropyLoss().cuda()
        else:
            self.criterion = nn.CrossEntropyLoss()

        policies = get_optim_policies(model=self.model, modality=self.modality, enable_pbn=True)

        self.optimizer = optim.SGD(policies, lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay)

        file = os.path.join(self.data_folder, 'model_best.pth.tar')
        if os.path.isfile(file):
            print("=> loading checkpoint '{}'".format('model_best.pth.tar'))

            checkpoint = torch.load(file)
            self.start_epoch = checkpoint['epoch']
            self.best_prec1 = checkpoint['best_prec1']
            self.model.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded model best ")
        else:
            print("=> no model best found at ")
            exit()

        cudnn.benchmark = True
示例#6
0
    def validate(self, logger):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        # switch to evaluate mode
        self.model.eval()

        end = time.time()
        for i, (images, labels) in enumerate(self.val_loader):
            if check_gpu() > 0:
                images = images.cuda(async=True)
                labels = labels.cuda(async=True)

            image_var = torch.autograd.Variable(images)
            label_var = torch.autograd.Variable(labels)

            # compute y_pred
            y_pred = self.model(image_var)
            if self.model_type == 'I3D':
                y_pred = y_pred[0]

            loss = self.criterion(y_pred, label_var)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(y_pred.data, labels, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            acc.update(prec1.item(), images.size(0))
            top1.update(prec1.item(), images.size(0))
            top5.update(prec5.item(), images.size(0))
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0:
                print('TrainVal: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(self.val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Accuracy {acc.avg:.3f}  Loss {loss.avg:.3f}'.format(
            acc=acc, loss=losses))
        logger.info(' * Accuracy {acc.avg:.3f}  Loss {loss.avg:.3f}'.format(
            acc=acc, loss=losses))

        return losses, acc
示例#7
0
def train_mnist(project_id, epoch, train_per_epoch, interval):
    check_gpu(logger)
    project_metadata = get_metadata(project_id)
    train(
        dataset=load_mnist_dataset(project_id=project_id,
                                   buffer_size=60000,
                                   batch_size=256),
        gen=build_generator_model(),
        dis=build_discriminator_model(),
        gen_opt=keras.optimizers.Adam(1e-4),
        dis_opt=keras.optimizers.Adam(1e-4),
        logger=logger,
        epochs=epoch,
        start_epoch=0,
        interval=interval,
        train_per_epoch=train_per_epoch,
        sample_size=4,
        batch_size=32,
        visualize=visualize_mnist_sample,
        project_metadata=project_metadata,
        gen_input_generator=MnistInputGenerator(feat_dim=100),
    )
示例#8
0
    def process(self):
        acc = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        losses = AverageMeter()
        log_file = os.path.join(self.data_folder, 'test.log')
        logger = Logger('test', log_file)
        # switch to evaluate mode
        self.model.eval()

        start_time = time.clock()
        print("Begin testing")
        for i, (images, labels) in enumerate(self.test_loader):
            if check_gpu() > 0:
                images = images.cuda(async=True)
                labels = labels.cuda(async=True)

            image_var = torch.autograd.Variable(images)
            label_var = torch.autograd.Variable(labels)

            # compute y_pred
            y_pred = self.model(image_var)
            loss = self.criterion(y_pred, label_var)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(y_pred.data, labels, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            acc.update(prec1.item(), images.size(0))
            top1.update(prec1.item(), images.size(0))
            top5.update(prec5.item(), images.size(0))

            if i % self.print_freq == 0:
                print('TestVal: [{0}/{1}]\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                    i, len(self.test_loader), loss=losses, top1=top1, top5=top5))

        print(
            ' * Accuracy {acc.avg:.3f}  Acc@5 {top5.avg:.3f} Loss {loss.avg:.3f}'.format(acc=acc, top5=top5,
                                                                                         loss=losses))

        end_time = time.clock()
        print("Total testing time %.2gs" % (end_time - start_time))
        logger.info("Total testing time %.2gs" % (end_time - start_time))
        logger.info(
            ' * Accuracy {acc.avg:.3f}  Acc@5 {top5.avg:.3f} Loss {loss.avg:.3f}'.format(acc=acc, top5=top5,
                                                                                         loss=losses))
示例#9
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    #arg('--model_path',type = str, default='checkpoints/finetune_chexnet_0.01_3')
    arg('--data_dir', type=str, default='chest_xray')
    arg('--batch-size', type=int, default=32)
    arg('--model_path', type=str, default='runs/debug/chexnet_0.1_5')

    args = parser.parse_args()

    model = utils.get_model(str(Path(args.model_path)))
    test_loader = generate_testloader(data_dir=args.data_dir,
                                      batch_size=args.batch_size)
    y_pred, y_test = utils.predict(model=model,
                                   testloader=test_loader,
                                   train_on_gpu=check_gpu())
    print(y_pred)
    print(y_test)
示例#10
0
def predict_image(model, image_path):
    model.eval()
    train_on_gpu = utils.check_gpu()
    print("Prediction in progress")
    image = Image.open(image_path).convert('RGB')

    # Define transformations for the image, should (note that imagenet models are trained with image size 224)

    transformation = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    # Preprocess the image
    image_tensor = transformation(image).float()

    # Add an extra batch dimension since pytorch treats all images as batches
    image_tensor = image_tensor.unsqueeze_(0)

    # Turn the input into a Variable
    input = Variable(image_tensor)
    if torch.cuda.is_available():
        input = input.cuda()

    # Predict the class of the image
    output = model.forward(input)

    _, predicted = torch.max(output.data, 1)
    if train_on_gpu:
        all_predicted = predicted.cpu().numpy().tolist()
        #y_test += labels.cpu().numpy().tolist()
    else:
        all_predicted = predicted.numpy().tolist()
        #y_test += labels.numpy().tolist()
    return all_predicted[0]
    '''
    def run_step(self, state, h, c, action, batch_size, prev_read_v, prev_alpha, M, zdist,
                read_only=False, step=0, decode=True, play=False, force_sharp=False):
        '''
        Run the model one time step
        '''

        # encode the image input
        if self.opts.input_detach:
            state = state.detach()
        s = self.simple_enc(state)

        # sample a noise
        z = utils.check_gpu(self.opts.gpu, zdist.sample((batch_size,)))

        # run dynamics engine
        prev_hidden = h[0].clone()
        h, c, cur_hidden = self.engine(h, c, s, action, z, prev_read_v=prev_read_v, step=step)

        # run memory module
        if self.opts.do_memory:
            base, M, alpha, prev_read_v = self.memory(cur_hidden, action, prev_hidden, prev_alpha, M, c=c[0], read_only=read_only, force_sharp=force_sharp)
            prev_alpha = alpha
            bases = base
        else:
            base = cur_hidden
            bases = [base] * self.num_components

        # run the rendering engine
        alpha_loss = 0
        out, m, eloss, init_maps, base_imgs = self.graphics_renderer(bases, num_components=self.num_components)
        if utils.check_arg(self.opts, 'alpha_loss_multiplier') and self.opts.alpha_loss_multiplier > 0:
            # memory regularization
            for i in range(1, len(m)):
                alpha_loss += (m[i].abs().sum() / batch_size)

        prev_state = out
        return prev_state, m, prev_alpha, alpha_loss, z, M, prev_read_v, h, c, init_maps, base_imgs, 0, cur_hidden
示例#12
0
    def loading_model(self):

        print('Loading %s model' % (self.model_type))

        if self.model_type == 'C3D':
            self.model = C3D()
            if self.pretrained:
                self.model.load_state_dict(torch.load('c3d.pickle'))
        elif self.model_type == 'I3D':
            if self.pretrained:
                self.model = I3D(num_classes=400, modality='rgb')
                self.model.load_state_dict(
                    torch.load('kinetics_i3d_model_rgb.pth'))
            else:
                self.model = I3D(num_classes=self.num_classes, modality='rgb')
        else:
            if self.pretrained:
                print("=> using pre-trained model")
                self.model = P3D199(pretrained=True,
                                    num_classes=400,
                                    dropout=self.dropout)

            else:
                print("=> creating model P3D")
                self.model = P3D199(pretrained=False,
                                    num_classes=400,
                                    dropout=self.dropout)
        # Transfer classes
        self.model = transfer_model(model=self.model,
                                    model_type=self.model_type,
                                    num_classes=self.num_classes)

        # Check gpu and run parallel
        if check_gpu() > 0:
            self.model = torch.nn.DataParallel(self.model).cuda()

        # define loss function (criterion) and optimizer
        self.criterion = nn.CrossEntropyLoss()

        if check_gpu() > 0:
            self.criterion = nn.CrossEntropyLoss().cuda()

        params = self.model.parameters()
        if self.model_type == 'P3D':
            params = get_optim_policies(model=self.model,
                                        modality=self.modality,
                                        enable_pbn=True)

        self.optimizer = optim.SGD(params=params,
                                   lr=self.lr,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay)

        # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=self.optimizer, mode='min', patience=10, verbose=True)

        # optionally resume from a checkpoint
        if self.resume:
            if os.path.isfile(self.resume):
                print("=> loading checkpoint '{}'".format(self.resume))
                checkpoint = torch.load(self.resume)
                self.start_epoch = checkpoint['epoch']
                self.best_prec1 = checkpoint['best_prec1']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    self.evaluate, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.resume))

        if self.evaluate:
            file_model_best = os.path.join(self.data_folder,
                                           'model_best.pth.tar')
            if os.path.isfile(file_model_best):
                print(
                    "=> loading checkpoint '{}'".format('model_best.pth.tar'))
                checkpoint = torch.load(file_model_best)
                self.start_epoch = checkpoint['epoch']
                self.best_prec1 = checkpoint['best_prec1']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    self.evaluate, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.resume))

        cudnn.benchmark = True
示例#13
0
##################################################################
##################################################################

##########################     MAIN     ##########################

##################################################################
##################################################################

# Saving settings
model_dir = os.path.join(opt.checkpoint_path, opt.name)
os.mkdir(model_dir) if not os.path.isdir(model_dir) else None
saver = Saver(model_dir, args=opt)

# Define model and optimiser
gpu = utils.check_gpu()
device = torch.device(
    "cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu")
model = SegNet(sigma=opt.sigma).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Recover weights, if required
if opt.recover:
    ckpt_file = os.path.join(model_dir, opt.reco_type + '_weights.pth')
    ckpt = torch.load(ckpt_file, map_location=device)
    model.load_state_dict(ckpt['model_state_dict'])
    epoch = ckpt['iter_nb'] + 1
    print('Model recovered from {}.'.format(ckpt_file))
    if 'optimizer_state_dict' in ckpt:
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        print('Optimizer recovered from {}.'.format(ckpt_file))
示例#14
0
def main():
    args = parser.parse_args()
    print_arguments(args)
    check_gpu(args.use_gpu)
    eval(args)
示例#15
0
 def init_memory(self, bs):
     memory = self.mem_bias.clone().repeat(bs, 1, 1)
     return utils.check_gpu(self.opts.gpu, memory)
示例#16
0
#from torch.autograd import Variable
#from PIL import Image
#import torch

from torch.optim import SGD as SGD
from torch.nn import CrossEntropyLoss

import torch
import utils
from prepare_train_test_val import generate_testloader
from models import chexnet, resnet
from utils import predict, check_gpu
import argparse
from pathlib import Path

train_on_gpu = check_gpu()

if not train_on_gpu:
    print('Cuda is not available for testing.Testing on CPU.......')
else:
    print('Cuda is available for testing. Testing on GPU.......')


def get_model(model_path):
    model = chexnet()
    state = torch.load(str(model_path))
    state = {
        key.replace('module.', ''): value
        for key, value in state['model'].items()
    }
    model.load_state_dict(state)
示例#17
0
    def forward(self,
                h,
                a,
                prev_h,
                prev_alpha,
                M,
                c=None,
                read_only=False,
                play=False,
                force_sharp=False):
        bs = a.size(0)
        if self.opts.mem_use_h:
            memory_q_input = h
        else:
            h_norm = F.normalize(h, dim=1)
            prev_h_norm = F.normalize(prev_h, dim=1)

            memory_q_input = h_norm - prev_h_norm

        kernels = self.get_kernel(a).view(-1, 1, 3, 3)

        # flipping kernels (e.g. kernel of Left == flipped kernel of Right
        new_a = a.cpu().numpy()
        _, action_label = torch.max(a, 1)
        action_label = action_label.long().cpu().numpy()
        mask = np.zeros((bs, 1))
        for i in range(bs):
            if 'pacman' in self.opts.data:
                if action_label[i] == 2:
                    new_a[i][1] = 1.0
                    new_a[i][2] = 0.0
                    mask[i][0] = 1.0
                elif action_label[i] == 4:
                    new_a[i][3] = 1.0
                    new_a[i][4] = 0.0
                    mask[i][0] = 1.0
            elif 'vizdoom' in self.opts.data:
                if action_label[i] == 0:
                    new_a[i][1] = 1.0
                    new_a[i][0] = 0.0
                    mask[i][0] = 1.0
        mask = utils.check_gpu(self.opts.gpu,
                               torch.FloatTensor(mask)).view(-1, 1, 1, 1)
        new_a = utils.check_gpu(self.opts.gpu, torch.FloatTensor(new_a))

        flipped_kernels = torch.flip(
            self.get_kernel(new_a).view(-1, 1, 3, 3), [2, 3])
        kernels = (1 - mask) * kernels + mask * flipped_kernels
        if self.opts.softmax_kernel:
            if force_sharp:
                tmp = torch.zeros_like(kernels.view(bs, -1))
                tmp[0][kernels.view(bs, -1).max(1)[1]] = 1.0
                kernels = tmp
            else:
                kernels = F.softmax(kernels.view(bs, -1) / self.opts.alpha_T,
                                    dim=1)
            kernels = kernels.view(bs, 1, 3, 3)

        gate = self.get_gate(memory_q_input)
        if force_sharp:
            if gate[0] > 0.5:
                gate = torch.ones_like(gate)
            else:
                gate = torch.zeros_like(gate)
        mem_h = int(math.sqrt(self.num_mem))
        alpha = self.conv2d_with_kernel(prev_alpha.view(bs, 1, mem_h, mem_h),
                                        kernels,
                                        v_dim=1)
        alpha = alpha.view(bs, -1)
        alpha = alpha * gate + prev_alpha * (1 - gate)
        if force_sharp:
            tmp = torch.zeros_like(alpha)
            tmp[0][alpha.view(bs, -1).max(1)[1]] = 1.0
            alpha = tmp

        if not read_only:
            tmp = self.get_vars_h(h)
            erase_v = tmp[:, :self.opts.memory_dim]
            add_v = tmp[:, 1 * self.opts.memory_dim:2 * self.opts.memory_dim]
            other_v = tmp[:, 2 * self.opts.memory_dim:]

            erase_v = F.sigmoid(erase_v)
            M = self.write(erase_v, add_v, alpha, M)

        read_v, block_read_v = self.read(alpha, M)
        final_h = [read_v, other_v]

        return final_h, M, alpha, read_v
def plot_similarity(labels, features, rotation):
    print("Calculating inner product of embedding vectors")
    corr = np.inner(features, features)
    print("Plotting...")
    sns.set(font_scale=1.2)
    g = sns.heatmap(
      corr,
      xticklabels=labels,
      yticklabels=labels,
      vmin=0,
      vmax=1,
      cmap="YlOrRd")
    g.set_xticklabels(labels, rotation=rotation)
    g.set_title("Semantic Textual Similarity")

check_gpu()

# module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/1'
module_url = 'https://tfhub.dev/google/universal-sentence-encoder/2'
tf_hub_embedder = hub.Module(module_url)

# Hyperparameter search for number of clusters for k-means
n_clusters = [5, 10, 12, 14, 16]
clustering_method = 'kmeans'

logger.info("Loading data")
files = glob.glob('tweets/cdnpoli_*.csv')
df = pd.read_csv(files[0])

for file in files[1:]:
    df_tmp = pd.read_csv(file)
示例#19
0
    def loading_model(self):

        print('Loading %s model' % (self.model_type))
        pretrained = None
        if self.pretrained:
            pretrained = 'imagenet'

        if self.model_type == 'inceptionv4':
            self.model = inceptionv4(num_classes=1000, pretrained=pretrained)
            if self.pretrained:
                num_ftrs = self.model.last_linear.in_features
                self.model.last_linear = nn.Linear(num_ftrs, self.num_classes)
                # free all layers:
                for i, param in self.model.named_parameters():
                    param.requires_grad = False
                # unfreeze last layers:
                ct = []
                for name, child in self.model.features.named_children():
                    if "4" in ct:
                        for param in child.parameters():
                            param.requires_grad = True
                    ct.append(name)

            else:
                num_ftrs = self.model.last_linear.in_features
                self.model.last_linear = nn.Linear(num_ftrs, self.num_classes)
        else:
            print('no model')
            exit()

        # Check gpu and run parallel
        if check_gpu() > 0:
            self.model = torch.nn.DataParallel(self.model).cuda()
            # self.model.cuda()

        # define loss function (criterion) and optimizer
        self.criterion = nn.CrossEntropyLoss()
        if check_gpu() > 0:
            self.criterion = nn.CrossEntropyLoss().cuda()

        params = list(
            filter(lambda p: p.requires_grad, self.model.parameters()))
        self.optimizer = optim.SGD(params=params,
                                   lr=self.lr,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay)

        file = os.path.join(self.data_folder, 'model_best.pth.tar')
        if os.path.isfile(file):
            print("=> loading checkpoint '{}'".format('model_best.pth.tar'))

            checkpoint = torch.load(file)
            self.start_epoch = checkpoint['epoch']
            self.best_prec1 = checkpoint['best_prec1']
            self.model.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model")
        else:
            print("=> no model best found at ")
            exit()

        cudnn.benchmark = True
示例#20
0
    def loading_model(self):

        print('Loading %s model' % (self.model_type))
        pretrained = None
        if self.pretrained:
            pretrained = 'imagenet'

        if self.model_type == 'inceptionv4':
            self.model = inceptionv4(num_classes=1000, pretrained=pretrained)
            if self.pretrained:
                num_ftrs = self.model.last_linear.in_features
                self.model.last_linear = nn.Linear(num_ftrs, self.num_classes)
                #free all layers:
                for i, param in self.model.named_parameters():
                    param.requires_grad = False
                #unfreeze last layers:
                ct = []
                for name, child in self.model.features.named_children():
                    if "4" in ct:
                        for param in child.parameters():
                            param.requires_grad = True
                    ct.append(name)

            else:
                num_ftrs = self.model.last_linear.in_features
                self.model.last_linear = nn.Linear(num_ftrs, self.num_classes)

        elif self.model_type == 'iresetv2':
            self.model = inceptionresnetv2(num_classes=self.num_classes,
                                           pretrained=pretrained)
        else:
            print('no model')
            exit()

        cudnn.benchmark = True
        # Check gpu and run parallel
        if check_gpu() > 0:
            self.model = torch.nn.DataParallel(self.model).cuda()
            # self.model.cuda()
        # define loss function (criterion) and optimizer
        self.criterion = nn.CrossEntropyLoss()
        if check_gpu() > 0:
            self.criterion = nn.CrossEntropyLoss().cuda()

        params = self.model.parameters()
        if self.pretrained:
            params = list(
                filter(lambda p: p.requires_grad, self.model.parameters()))

        self.optimizer = optim.SGD(params=params,
                                   lr=self.lr,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay)
        # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=self.optimizer, mode='min', factor=0.1,
        #                                                       patience=10, verbose=True, min_lr=0)
        self.scheduler = optim.lr_scheduler.StepLR(optimizer=self.optimizer,
                                                   step_size=10,
                                                   gamma=0.1)
        # self.optimizer = optim.Adam(params, lr=self.lr)
        # optionally resume from a checkpoint
        if self.resume:
            if os.path.isfile(self.resume):
                print("=> loading checkpoint '{}'".format(self.resume))
                checkpoint = torch.load(self.resume)
                self.start_epoch = checkpoint['epoch']
                self.best_prec1 = checkpoint['best_prec1']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint (epoch {})".format(
                    checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.resume))

        if self.evaluate:
            file_model_best = os.path.join(self.data_folder,
                                           'model_best.pth.tar')
            if os.path.isfile(file_model_best):
                print(
                    "=> loading checkpoint '{}'".format('model_best.pth.tar'))
                checkpoint = torch.load(file_model_best)
                self.start_epoch = checkpoint['epoch']
                self.best_prec1 = checkpoint['best_prec1']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint (epoch {})".format(
                    checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(self.resume))

        cudnn.benchmark = True
示例#21
0
    logits = []
    for batch in tqdm(test_dataloader):
        b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            logits.append(model(b_input_ids, b_attn_mask))
    
    logits = torch.cat(logits, dim=0)
    prob = F.softmax(logits, dim=1).cpu().numpy()

    return prob

###############################################################################

if __name__ == "__main__":
    set_seed()
    device = check_gpu()

    
    print("\nPreparing train data...")
    d = load_data("/home/jessica/data/SciCite/train.jsonl")
    data = pd.DataFrame.from_dict(d).T
    data = data.loc[:, ["string", "label"]] 
    
    # replace categorical labels with numbers
    label_map = {"background": 0, "method": 1, "result": 2}
    data["label"].replace(label_map, inplace=True)
    
    # train-valid split
    X_train, X_valid, y_train, y_valid = train_test_split(
        data.string, data.label, test_size=0.3
    )
示例#22
0
    def train(self, logger, epoch):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        rate = get_learning_rate(self.optimizer)[0]
        # switch to train mode
        self.model.train()

        end = time.time()
        for i, (images, target) in enumerate(self.train_loader):
            # adjust learning rate scheduler step
            self.scheduler.batch_step()

            # measure data loading time
            data_time.update(time.time() - end)
            if check_gpu() > 0:
                images = images.cuda(async=True)
                target = target.cuda(async=True)
            image_var = torch.autograd.Variable(images)
            label_var = torch.autograd.Variable(target)

            self.optimizer.zero_grad()

            # compute y_pred
            y_pred = self.model(image_var)
            if self.model_type == 'I3D':
                y_pred = y_pred[0]

            loss = self.criterion(y_pred, label_var)
            # measure accuracy and record loss
            prec1, prec5 = accuracy(y_pred.data, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            acc.update(prec1.item(), images.size(0))
            top1.update(prec1.item(), images.size(0))
            top5.update(prec5.item(), images.size(0))
            # compute gradient and do SGD step

            loss.backward()
            self.optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0:
                print('Epoch: [{0}/{1}][{2}/{3}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Lr {rate:.5f}\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          epoch,
                          self.epochs,
                          i,
                          len(self.train_loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          rate=rate,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        logger.info('Epoch: [{0}/{1}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Lr {rate:.5f}\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                    'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                        epoch,
                        self.epochs,
                        batch_time=batch_time,
                        data_time=data_time,
                        rate=rate,
                        loss=losses,
                        top1=top1,
                        top5=top5))
        return losses, acc
示例#23
0
    def process(self):
        acc = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        losses = AverageMeter()
        log_file = os.path.join(self.data_folder, 'test.log')
        logger = Logger('test', log_file)
        # switch to evaluate mode
        self.model.eval()

        start_time = time.clock()
        print("Begin testing")
        predicted, probs = [], []
        for i, (images, labels) in enumerate(self.test_loader):

            if check_gpu() > 0:
                images = images.cuda(async=True)
                labels = labels.cuda(async=True)
            images = torch.autograd.Variable(images)
            labels = torch.autograd.Variable(labels)

            if self.tencrop:
                # Due to ten-cropping, input batch is a 5D Tensor
                batch_size, number_of_crops, number_of_channels, height, width = images.size(
                )

                # Fuse batch size and crops
                images = images.view(-1, number_of_channels, height, width)

                # Compute model output
                output_batch_crops = self.model(images)

                # Average predictions for each set of crops
                output_batch = output_batch_crops.view(batch_size,
                                                       number_of_crops,
                                                       -1).mean(1)
                label_repeated = labels.repeat(10, 1).transpose(
                    1, 0).contiguous().view(-1, 1).squeeze()
                loss = self.criterion(output_batch_crops, label_repeated)
            else:
                output_batch = self.model(images)
                loss = self.criterion(output_batch, labels)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output_batch.data, labels, topk=(1, 5))
            #     print(prec1, prec5)
            losses.update(loss.item(), images.size(0))
            acc.update(prec1.item(), images.size(0))
            top1.update(prec1.item(), images.size(0))
            top5.update(prec5.item(), images.size(0))

            if i % self.print_freq == 0:
                print('TestVal: [{0}/{1}]\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(self.test_loader),
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(
            ' * Accuracy {acc.avg:.3f}  Acc@5 {top5.avg:.3f} Loss {loss.avg:.3f}'
            .format(acc=acc, top5=top5, loss=losses))

        end_time = time.clock()
        print("Total testing time %.2gs" % (end_time - start_time))
        logger.info("Total testing time %.2gs" % (end_time - start_time))
        logger.info(' * Accuracy {acc.avg:.3f} Loss {loss.avg:.3f}'.format(
            acc=acc, top5=top5, loss=losses))
示例#24
0
    def validate(self, logger):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        # switch to evaluate mode
        self.model.eval()

        end = time.time()
        for i, (images, labels) in enumerate(self.val_loader):
            if check_gpu() > 0:
                images = images.cuda(async=True)
                labels = labels.cuda(async=True)
            images = torch.autograd.Variable(images)
            labels = torch.autograd.Variable(labels)

            if self.tencrop:
                # Due to ten-cropping, input batch is a 5D Tensor
                batch_size, number_of_crops, number_of_channels, height, width = images.size(
                )

                # Fuse batch size and crops
                images = images.view(-1, number_of_channels, height, width)

                # Compute model output
                output_batch_crops = self.model(images)

                # Average predictions for each set of crops
                output_batch = output_batch_crops.view(batch_size,
                                                       number_of_crops,
                                                       -1).mean(1)
                label_repeated = labels.repeat(10, 1).transpose(
                    1, 0).contiguous().view(-1, 1).squeeze()
                loss = self.criterion(output_batch_crops, label_repeated)
            else:
                output_batch = self.model(images)
                loss = self.criterion(output_batch, labels)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output_batch.data, labels, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            acc.update(prec1.item(), images.size(0))
            top1.update(prec1.item(), images.size(0))
            top5.update(prec5.item(), images.size(0))
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0:
                print('TrainVal: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(self.val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Accuracy {acc.avg:.3f}  Loss {loss.avg:.3f}'.format(
            acc=acc, loss=losses))
        logger.info(' * Accuracy {acc.avg:.3f}  Loss {loss.avg:.3f}'.format(
            acc=acc, loss=losses))

        return losses, acc