示例#1
0
    def __init__(self, zone, config):

        # Initialises the source/destination of the log messages
        self.data_source_path = config['rabbitmq_input']['dir']
        self.encapsulate_dest_path = zone + config['encalupated_out'][
            'dir'] + '/'
        self.data_source = config['rabbitmq_input']['type']
        mkdir_p(self.data_source)
        self.n_mprocessing = config['processing']['ncores_per_zone']

        self.part = 0
        #count number of the file

        rep_tokens = {
            '\\': '',
            '"{': '{',
            '}"': '}',
            '\'{': '{',
            '}\'': '}',
            '{}': '"NA"'
        }
        self.rep_tokens = dict(
            (re.escape(k), v) for k, v in rep_tokens.iteritems())
        self.pattern_tokens = re.compile("|".join(self.rep_tokens.keys()))

        rep_for_exchange = r'"oslo.message"'
        self.pattern_for_exchange = re.compile(rep_for_exchange)

        if self.data_source != 'File':
            raise ValueError("Data source other than File is not implemented")
        if self.data_source == 'File':
            self.files_to_load = None
        # NOTE(ab981s) change number of processors to modify the processing speed
        self.pool = Pool(self.n_mprocessing)

        LOG.debug("Completed initialization ")
                    default=0.1,
                    type=float,
                    help='learning rate Decay factor')  # works for MNIST
parser.add_argument('--stage2_lr_step',
                    default=6,
                    type=float,
                    help='learning rate Decay step')  # works for MNIST
parser.add_argument('--stage2_bs', default=128, type=int, help='batch size')

args = parser.parse_args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.checkpoint = './checkpoints/mnist/%s-%s-%s-dim%s-T%s-alpha%s' % (
    args.train_class_num, args.test_class_num, args.arch, args.embed_dim,
    args.temperature, args.alpha)
if not os.path.isdir(args.checkpoint):
    mkdir_p(args.checkpoint)

# folder to save figures
args.plotfolder = os.path.join(args.checkpoint, "plotter")
if not os.path.isdir(args.plotfolder):
    mkdir_p(args.plotfolder)
# folder to save histogram
args.histfolder = os.path.join(args.checkpoint, "histogram")
if not os.path.isdir(args.histfolder):
    mkdir_p(args.histfolder)

print('==> Preparing data..')
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307, ), (0.3081, ))])
trainset = MNIST(root='../../data',
parser.add_argument('--hist_bins',
                    default=100,
                    type=int,
                    help='divided into n bins')
parser.add_argument('--hist_norm',
                    default=True,
                    action='store_true',
                    help='if norm the frequency to [0,1]')

args = parser.parse_args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.checkpoint = './checkpoints/mnist/%s_%s_%s_dim%s_gamma%s' % (
    args.train_class_num, args.test_class_num, args.arch, args.embed_dim,
    args.gamma)
if not os.path.isdir(args.checkpoint):
    mkdir_p(args.checkpoint)

print('==> Preparing data..')
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307, ), (0.3081, ))])
trainset = MNIST(root='../../data',
                 train=True,
                 download=True,
                 transform=transform,
                 train_class_num=args.train_class_num,
                 test_class_num=args.test_class_num,
                 includes_all_train_class=args.includes_all_train_class)
testset = MNIST(root='../../data',
                train=False,
                download=True,
示例#4
0
parser.add_argument('--plot_quality', default=200, type=int, help='DPI of plot figure')
parser.add_argument('--bins', default=50, type=int, help='divided into n bins')
parser.add_argument('--tail_number', default=50, type=int,
                    help='number of maximum distance we do not take into account, '
                         'which may be anomaly or wrong labeled.')




args = parser.parse_args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.checkpoint = './checkpoints/mnist/' + args.arch +\
                  '/A%s_B%s_embed%s' % (args.alpha, args.beta,args.embed_dim)
if not os.path.isdir(args.checkpoint):
    mkdir_p(args.checkpoint)

# folder to save figures
args.plotfolder1 = os.path.join(args.checkpoint,"plotter_Stage1")
if not os.path.isdir(args.plotfolder1):
    mkdir_p(args.plotfolder1)
# folder to save figures
args.plotfolder2 = os.path.join(args.checkpoint,"plotter_Stage2")
if not os.path.isdir(args.plotfolder2):
    mkdir_p(args.plotfolder2)

print('==> Preparing data..')
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
parser.add_argument('--distance', default='l2', choices=['l2', 'l1', 'dotproduct'],
                    type=str, help='choosing distance metric')
parser.add_argument('--scaled', default=True, action='store_true',
                    help='If scale distance by sqrt(embed_dim)')

# Parameters for stage 1
parser.add_argument('--stage1_resume', default='', type=str, metavar='PATH', help='path to latest checkpoint')
parser.add_argument('--bins', default=20, type=int, help='divided into n bins')

# Parameters for plotting

args = parser.parse_args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.distance_folder = './checkpoints/mnist/' + args.arch + '/distance_%s_%s' % (args.alpha, args.beta)
if not os.path.isdir(args.distance_folder):
    mkdir_p(args.distance_folder)

print('==> Preparing data..')
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

trainset = MNIST(root='../../data', train=True, download=True, transform=transform,
                 train_class_num=args.train_class_num, test_class_num=args.test_class_num,
                 includes_all_train_class=args.includes_all_train_class)

testset = MNIST(root='../../data', train=False, download=True, transform=transform,
                train_class_num=args.train_class_num, test_class_num=args.test_class_num,
                includes_all_train_class=args.includes_all_train_class)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4)
示例#6
0
def main():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)
    best_acc = 0  # best test accuracy
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch

    # checkpoint
    args.checkpoint = './checkpoints/cifar/' + args.arch
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    trainset = CIFAR100(root='../../data', train=True, download=True, transform=transform_train,
                        train_class_num=args.train_class_num, test_class_num=args.test_class_num,
                        includes_all_train_class=args.includes_all_train_class)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4)
    testset = CIFAR100(root='../../data', train=False, download=True, transform=transform_test,
                       train_class_num=args.train_class_num, test_class_num=args.test_class_num,
                       includes_all_train_class=args.includes_all_train_class)
    testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4)


    # Model
    print('==> Building model..')
    net = models.__dict__[args.arch](num_classes=args.train_class_num) # CIFAR 100
    net = net.to(device)

    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    if args.resume:
        # Load checkpoint.
        if os.path.isfile(args.resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.resume)
            net.load_state_dict(checkpoint['net'])
            # best_acc = checkpoint['acc']
            # print("BEST_ACCURACY: "+str(best_acc))
            start_epoch = checkpoint['epoch']
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'Learning Rate', 'Train Loss','Train Acc.', 'Test Loss', 'Test Acc.'])

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)

    # test(0, net, trainloader, testloader, criterion, device)
    epoch=0
    if not args.evaluate:
        for epoch in range(start_epoch, start_epoch + args.es):
            print('\nEpoch: %d   Learning rate: %f' % (epoch+1, optimizer.param_groups[0]['lr']))
            adjust_learning_rate(optimizer, epoch, args.lr)
            train_loss, train_acc = train(net,trainloader,optimizer,criterion,device)
            save_model(net, None, epoch, os.path.join(args.checkpoint,'last_model.pth'))
            test_loss, test_acc = 0, 0
            #
            logger.append([epoch+1, optimizer.param_groups[0]['lr'], train_loss, train_acc, test_loss, test_acc])

    test(epoch, net, trainloader, testloader, criterion, device)
    logger.close()
示例#7
0
def main():
    global best_prec1, args

    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    if not os.path.isdir(args.checkpoint) and args.local_rank == 0:
        mkdir_p(args.checkpoint)

    if args.fp16:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if args.static_loss_scale != 1.0:
        if not args.fp16:
            print("Warning:  if --fp16 is not used, static_loss_scale will be ignored.")

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = BuildNet(backbone=args.arch, num_classes=args.train_class_num)


    model = model.cuda()
    if args.fp16:
        model = network_to_half(model)
    if args.distributed:
        # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf
        # for the older version of APEX please use shared_param, for newer one it is delay_allreduce
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    # equals to psoftmax if input is ["normweight_fea2cen"]
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    if args.fp16:
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.static_loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale,
                                   verbose=False)

    # optionally resume from a checkpoint
    title = 'ImageNet-' + args.arch
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            try:
                model.load_state_dict(checkpoint['state_dict'])
            except:
                from collections import OrderedDict
                new_check_point = OrderedDict()
                for k, v in checkpoint['state_dict'].items():
                    name = k[7:]  # remove `module.`
                    new_check_point[name] = v
                model.load_state_dict(new_check_point)
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        if args.local_rank == 0:
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
            logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Valid Top5.'])

    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, args.val)


    crop_size = 224
    val_size = 256

    # pipe = HybridTrainPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu)
    # pipe.build()
    # train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    pipe = HybridValPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size)
    pipe.build()
    val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))
    validate(val_loader, model)
示例#8
0
def main():
    global best_prec1, args

    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    if not os.path.isdir(args.checkpoint) and args.local_rank == 0:
        mkdir_p(args.checkpoint)

    if args.fp16:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if args.static_loss_scale != 1.0:
        if not args.fp16:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = Network(backbone=args.arch, num_classes=args.train_class_num)

    model = model.cuda()
    if args.fp16:
        model = network_to_half(model)
    if args.distributed:
        # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf
        # for the older version of APEX please use shared_param, for newer one it is delay_allreduce
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    if args.fp16:
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.static_loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale,
                                   verbose=False)

    # optionally resume from a checkpoint
    title = 'ImageNet-' + args.arch
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(
                args.resume,
                map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            if args.local_rank == 0:
                logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                                title=title,
                                resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        if args.local_rank == 0:
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            title=title)
            logger.set_names([
                'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
                'Valid Acc.', 'Valid Top5.'
            ])

    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    if (args.arch == "inception_v3"):
        crop_size = 299
        val_size = 320  # I chose this value arbitrarily, we can adjust.
    else:
        crop_size = 224
        val_size = 256

    pipe = HybridTrainPipe(batch_size=args.batch_size,
                           num_threads=args.workers,
                           device_id=args.local_rank,
                           data_dir=traindir,
                           crop=crop_size,
                           dali_cpu=args.dali_cpu)
    pipe.build()
    train_loader = DALIClassificationIterator(
        pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    # pipe = HybridValPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size)
    # pipe.build()
    # val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    # if args.evaluate:
    #     validate(val_loader, model, criterion)
    #     return

    total_time = AverageMeter()
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        adjust_learning_rate(optimizer, epoch, args)

        if args.local_rank == 0:
            print('\nEpoch: [%d | %d] LR: %f' %
                  (epoch + 1, args.epochs, optimizer.param_groups[0]['lr']))

        [train_loss, train_acc,
         avg_train_time] = train(train_loader, model, criterion, optimizer,
                                 epoch)
        total_time.update(avg_train_time)
        # evaluate on validation set
        # [test_loss, prec1, prec5] = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            # append logger file
            # logger.append([optimizer.param_groups[0]['lr'], train_loss, test_loss, train_acc, prec1, prec5])
            logger.append([
                optimizer.param_groups[0]['lr'], train_loss, 0.0, train_acc,
                0.0, 0.0
            ])

            # is_best = prec1 > best_prec1
            is_best = False
            # best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                checkpoint=args.checkpoint,
                filename="checkpoint.pth.tar")
            # if epoch == args.epochs - 1:
            #     print('##Top-1 {0}\n'
            #           '##Top-5 {1}\n'
            #           '##Perf  {2}'.format(prec1, prec5, args.total_batch_size / total_time.avg))

        # reset DALI iterators
        train_loader.reset()
        # val_loader.reset()

    if args.local_rank == 0:
        logger.close()
示例#9
0
def main():
    args.checkpoint = './checkpoints/mnist/' + args.arch
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # folder to save figures
    args.plotfolder = './checkpoints/mnist/' + args.arch + '/plotter'
    if not os.path.isdir(args.plotfolder):
        mkdir_p(args.plotfolder)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch

    print('==> Preparing data..')
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    trainset = MNIST(root='../../data', train=True, download=True, transform=transform,
                     train_class_num=args.train_class_num, test_class_num=args.test_class_num,
                     includes_all_train_class=args.includes_all_train_class)

    testset = MNIST(root='../../data', train=False, download=True, transform=transform,
                    train_class_num=args.train_class_num, test_class_num=args.test_class_num,
                    includes_all_train_class=args.includes_all_train_class)

    # data loader
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4)
    testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4)

    print('==> Building model..')
    net = Network(backbone=args.arch, num_classes=args.train_class_num,embed_dim=args.embed_dim)
    fea_dim = net.classifier.in_features
    net = net.to(device)

    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    criterion_softamx = nn.CrossEntropyLoss()
    criterion_centerloss = CenterLoss(num_classes=args.train_class_num, feat_dim=fea_dim).to(device)
    optimizer_softmax = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
    optimizer_centerloss = torch.optim.SGD(criterion_centerloss.parameters(), lr=args.center_lr, momentum=0.9,
                                           weight_decay=5e-4)

    if args.resume:
        # Load checkpoint.
        if os.path.isfile(args.resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.resume)
            net.load_state_dict(checkpoint['net'])
            criterion_centerloss.load_state_dict(checkpoint['centerloss'])
            # best_acc = checkpoint['acc']
            # print("BEST_ACCURACY: "+str(best_acc))
            start_epoch = checkpoint['epoch']
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'Total Loss','Softmax Loss', 'Center Loss', 'train Acc.'])


    if not args.evaluate:
        scheduler = lr_scheduler.StepLR(optimizer_softmax, step_size=20, gamma=0.1)
        for epoch in range(start_epoch, start_epoch + args.es):
            print('\nEpoch: %d   Learning rate: %f' % (epoch + 1, optimizer_softmax.param_groups[0]['lr']))
            train_loss, softmax_loss, center_loss, train_acc = train(net, trainloader, optimizer_softmax,
                                                                     optimizer_centerloss, criterion_softamx,
                                                                     criterion_centerloss, device)
            save_model(net, criterion_centerloss, epoch, os.path.join(args.checkpoint, 'last_model.pth'))
            # plot the training data
            if args.plot:
                plot_feature(net,criterion_centerloss, trainloader, device, args.plotfolder, epoch=epoch,
                         plot_class_num=args.train_class_num,maximum=args.plot_max, plot_quality=args.plot_quality)

            logger.append([epoch + 1, train_loss, softmax_loss, center_loss, train_acc])
            scheduler.step()
            test(net, testloader, device)



    if args.plot:
        plot_feature(net, criterion_centerloss, testloader, device, args.plotfolder, epoch="test",
                     plot_class_num=args.train_class_num+1, maximum=args.plot_max, plot_quality=args.plot_quality)
    logger.close()
def main():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)
    best_acc = 0  # best test accuracy
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch

    # checkpoint
    args.checkpoint = './checkpoints/mnist/' + args.arch
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # folder to save figures
    args.plotfolder = './checkpoints/mnist/' + args.arch + '/plotter'
    if not os.path.isdir(args.plotfolder):
        mkdir_p(args.plotfolder)

    # Data
    print('==> Preparing data..')
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    trainset = MNIST(root='../../data',
                     train=True,
                     download=True,
                     transform=transform,
                     train_class_num=args.train_class_num,
                     test_class_num=args.test_class_num,
                     includes_all_train_class=args.includes_all_train_class)
    testset = MNIST(root='../../data',
                    train=False,
                    download=True,
                    transform=transform,
                    train_class_num=args.train_class_num,
                    test_class_num=args.test_class_num,
                    includes_all_train_class=args.includes_all_train_class)
    # data loader
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.bs,
                                              shuffle=True,
                                              num_workers=4)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=args.bs,
                                             shuffle=False,
                                             num_workers=4)

    # Model
    net = Network(backbone=args.arch,
                  num_classes=args.train_class_num,
                  embed_dim=args.embed_dim)
    fea_dim = net.classifier.in_features
    net = net.to(device)

    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    if args.resume:
        # Load checkpoint.
        if os.path.isfile(args.resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.resume)
            net.load_state_dict(checkpoint['net'])
            # best_acc = checkpoint['acc']
            # print("BEST_ACCURACY: "+str(best_acc))
            start_epoch = checkpoint['epoch']
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names([
            'Epoch', 'Learning Rate', 'Train Loss', 'Train Acc.', 'Test Loss',
            'Test Acc.'
        ])

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=5e-4)

    # test(0, net, trainloader, testloader, criterion, device)
    epoch = 0
    if not args.evaluate:
        for epoch in range(start_epoch, args.es):
            print('\nEpoch: %d   Learning rate: %f' %
                  (epoch + 1, optimizer.param_groups[0]['lr']))
            adjust_learning_rate(optimizer, epoch, args.lr, step=20)
            train_loss, train_acc = train(net, trainloader, optimizer,
                                          criterion, device)
            save_model(net, None, epoch,
                       os.path.join(args.checkpoint, 'last_model.pth'))
            test_loss, test_acc = 0, 0
            #
            logger.append([
                epoch + 1, optimizer.param_groups[0]['lr'], train_loss,
                train_acc, test_loss, test_acc
            ])
            plot_feature(net,
                         trainloader,
                         device,
                         args.plotfolder,
                         epoch=epoch,
                         plot_class_num=args.train_class_num,
                         maximum=args.plot_max,
                         plot_quality=args.plot_quality)
            test(epoch, net, trainloader, testloader, criterion, device)

    test(99999, net, trainloader, testloader, criterion, device)
    plot_feature(net,
                 testloader,
                 device,
                 args.plotfolder,
                 epoch="test",
                 plot_class_num=args.train_class_num + 1,
                 maximum=args.plot_max,
                 plot_quality=args.plot_quality)
    logger.close()
示例#11
0
                    metavar='PATH',
                    help='path to latest checkpoint')

# Parameters for plotting
parser.add_argument(
    '--plot_max',
    default=0,
    type=int,
    help='max examples to plot in each class, 0 indicates all.')

args = parser.parse_args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.plotter = './checkpoints/mnist/' + args.arch + '/plotter_%s_%s' % (
    args.alpha, args.beta)
if not os.path.isdir(args.plotter):
    mkdir_p(args.plotter)

print('==> Preparing data..')
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307, ), (0.3081, ))])

trainset = MNIST(root='../../data',
                 train=True,
                 download=True,
                 transform=transform,
                 train_class_num=args.train_class_num,
                 test_class_num=args.test_class_num,
                 includes_all_train_class=args.includes_all_train_class)

testset = MNIST(root='../../data',
示例#12
0
    def __init__(self, zone, config):

        # Initialises the source/destination of the log messages
        self.transaction            = config['analysis']['transaction']
        self.maxtransactionduration = config['analysis']['maxtransactionduration']
        self.data_source            = config['rabbitmq_input']['type']
	self.save_all_logs          = config['analysis']['save_all_logs']
	self.save_capsules          = config['analysis']['save_capsules']
        self.encapsulate_dest_path  =  os.path.join(zone, config['encalupated_out']['dir'])
        self.maxtransactionduration = config['analysis']['maxtransactionduration'] # if there is no end event after this period, the transaction is failed
        self.startevent 			= 'compute.instance.create.start'
        self.endevent 				= 'compute.instance.create.end'
        self.VMidList               = []
        self.ReqidList              = []
        self.TokenidList            = []
        self.GlobalidList           = []
        self.token2reqidDic = collections.defaultdict(dict)
        self.token2insidDic = collections.defaultdict(dict)
        self.token2starttimeDic = collections.defaultdict(dict)
        self.actiondfColList = ['transaction', 'token', 'request_id', 'instance_id', 'global_id', 'project_id', 'tenant_id',
                           'user_id', 'transaction_tstart', 'transaction_tend']
        self.actionstartdf = pd.DataFrame(columns=self.actiondfColList)
        self.cerror = -1 # counter for the number of exception errors happend (can be canceled out)
        self.rowcount = -1 # counter for tracking the number of records/logs/lines (can be canceled out)
        self.transaction_aggregated_df = pd.DataFrame() # dataframe for storing the required infromation of observed VMs that have been launched

        self.debug_file_flag = int(config['debug']['debug_file_flag'])
	self.debug_sherlock_flag = int(config['debug']['debug_sherlock_flag']);
 	if self.debug_sherlock_flag:
		self.debug_sherlock_file = config['debug']['debug_sherlock_file'];

        self.timeinterval = config['analysis']['cache_flush_interval']
        if  self.debug_file_flag == 1:
            self.current_interval_tick = np.datetime64(pd.to_datetime(str(config['debug']['startingdate'])  +'-' +str(config['debug']['startingtime'])))
        else:
            self.current_interval_tick = np.datetime64(pd.datetime.now())
        self.previous_interval_tick = self.current_interval_tick - np.timedelta64(self.timeinterval, 's')
        self.future_interval_tick = self.current_interval_tick + np.timedelta64(self.timeinterval, 's')
        self.interval_tickList = [self.previous_interval_tick, self.current_interval_tick, self.future_interval_tick]
        threading.Timer(1.0*int(self.timeinterval), self.update_current_tick_and_more).start()   # Timer that calls "update_current_tick_and_more" every "self.timeinterval" seconds

        mkdir_p(self.encapsulate_dest_path)
        self.n_mprocessing          = config['processing']['ncores_per_zone']
        self.part = 0; #count number of the file

        rep_tokens      = {'\\': '',  '"{': '{',  '}"': '}', '\'{': '{',   '}\'': '}', '{}':'"NA"'}
        self.rep_tokens = dict((re.escape(k), v) for k, v in rep_tokens.iteritems())
        self.pattern_tokens = re.compile("|".join(self.rep_tokens.keys()))

        rep_for_exchange            = r'"oslo.message"'
        self.pattern_for_exchange   = re.compile(rep_for_exchange)

        if self.data_source == 'file':
            self.files_to_load = None;
            self.data_source_path       = config['rabbitmq_input']['file_parameters']['dir'];
        elif self.data_source == 'stream':
	    self.flush_sherlock_msglist_interval = config['rabbitmq_input']['stream_parameters']['flush_sherlock_msglist_interval']
            LOG.debug("starting the timer " + str(self.flush_sherlock_msglist_interval))
            threading.Timer(self.flush_sherlock_msglist_interval, self.sherlock_retrive_df_process_frame).start();
            if self.debug_sherlock_flag == 0:
		self.sherlock_listener =  SherlockListener(config)
	    	self.sherlock_listener.start_listener()
            	#t1 = threading.Thread(target=self.sherlock_listener.start_listener)
    	    	#t1.start()
	    	#print "joining thread" 
           	# t1.join(0)
	    else:
		self.msg_list = [];
		self.load_and_process_new_logs(filename = './sherlock');
        else:
            raise ValueError("Data source other than File is not implemented");

        if config['debug']['debug_file_flag'] == 1:
            self.current_interval_tick = np.datetime64(pd.to_datetime(str(config['debug']['startingdate'])  +'-' +str(config['debug']['startingtime'])))
        else:
            self.current_interval_tick = np.datetime64(pd.datetime.now())

        LOG.debug("Completed initialization ")