Exemplo n.º 1
0
def dump_model():
    def get_bn_params(bn):
        return [bn.running_mean,bn.running_var,bn.weight.data,bn.bias.data]
    def get_block_params(block):
        params=[block.conv1.weight.data]+get_bn_params(block.bn1)+[block.conv2.weight.data]+get_bn_params(block.bn2)
        if block.downsample:
            params+=[block.downsample._modules['0'].weight.data]+get_bn_params(block.downsample._modules['1'])
        return params
    def get_BN_block_params(block):
        params=[block.conv1.weight.data]+get_bn_params(block.bn1)+[block.conv2.weight.data]+get_bn_params(block.bn2)+[block.conv3.weight.data]+get_bn_params(block.bn3)
        if block.downsample:
            params+=[block.downsample._modules['0'].weight.data]+get_bn_params(block.downsample._modules['1'])
        return params
    def get_layer_params(layer):
        params=[]
        for block in layer._modules:
            params+=get_block_params(layer._modules[block])
        return params
    model = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2])
    # model.load('resnet18\\mergedresnet18.sd')
    checkpoint = torch.load('resnet18\\checkpoint_blu35103.pth.tar')
    #model.load_state_dict(moduledict_to_dict(checkpoint['state_dict']))
    model.load_state_dict(checkpoint['state_dict'])
    #model.quantize_from('quant_param89.data')
    params=[model.conv1.weight.data]+get_bn_params(model.bn1)#(64,3,7,7)
    params+=get_layer_params(model.layer1)
    params+=get_layer_params(model.layer2)
    params += get_layer_params(model.layer3)
    params += get_layer_params(model.layer4)
    params+=[model.fc.weight.data,model.fc.bias.data]
    with open('checkpoint_blu35103.data','wb') as f:
        for para_tensor in params:
            para_np=para_tensor.numpy()
            para_np.tofile(f)
    return
Exemplo n.º 2
0
def main():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = resnet.ResNet(resnet.Bottleneck, [3,8,36,3])
    model.load_state_dict(torch.load('resnet152_best-148.ckpt', map_location=device))

    loader = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])

    model.cuda()
    model.eval()

    with torch.no_grad():
        # ResNet152 Python Pytorch Test...
        '''
        img_path = "9/" + i

        image = Image.open(img_path)
        image = image.convert('RGB')

        image_tensor = loader(image).unsqueeze(0)
        image_numpy = image_tensor.numpy()
        
        image_tensor = image_tensor.cuda()

        output = model(image_tensor)
        _, predict = torch.max(output.data, 1)
        print("predicted : ", predict, i)
        '''
        
        # Save Script Module
        example = torch.rand(1,3,32,32).cuda()
        traced_script_module = torch.jit.trace(model, example)
        traced_script_module.save('script_module.pt')
Exemplo n.º 3
0
def conv_validation():
    #model = resnet.__dict__['resnet18'](pretrained=True)
    model = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2])
    #model.load('resnet18\\mergedresnet18.sd')
    checkpoint = torch.load('resnet18\\checkpoint_renorm89.pth.tar')
    model.load_state_dict(moduledict_to_dict(checkpoint['state_dict']))
    #model.load_state_dict(checkpoint['state_dict'])
    model.quantize('quant_param89.data')
    model.load_blu('3sigma.blu')
    #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    normalize = transforms.Normalize(mean=[128/255,128/255,128/255], std=[0.226, 0.226, 0.226])
    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder('..\\data\\imagenet\\val', transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=10, shuffle=False,
        num_workers=4, pin_memory=True)

    acc1_t = torch.tensor([0.0])
    acc5_t = torch.tensor([0.0])
    for i, (input, target) in enumerate(val_loader):
        #input=(input-0.5)/0.226
        output=model(input)
        #output=model.forward_blu(input)
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        print('{}:acc1:{},acc5:{}'.format(i,acc1,acc5))
        acc1_t+=acc1
        acc5_t+=acc5
    print('acc1:{}\nacc5:{}'.format(acc1_t/(i+1), acc5_t/(i+1)))
Exemplo n.º 4
0
def unittest():
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    model = resnet.ResNet(args)
    sess.run(tf.global_variables_initializer())
Exemplo n.º 5
0
  def __init__( self, T, C ):

    self.T          = T
    self.C          = C

    self.X          = tf.placeholder( tf.float32, [None, self.T, 224, 224, 3] )
    self.Y          = tf.placeholder( tf.int32,   [None, self.T] )
    
    # The Joint annotations.
    self.J          = 18                            # Using CMU Openpose
    self.P          = tf.placeholder( tf.float32, [None, self.T, 7, 7, self.J] )

    self.LR         = tf.placeholder( tf.float32 )  # Learning rate
    self.lambda_l2  = tf.placeholder( tf.float32 )  # Regularization factor
    self.phase      = tf.placeholder( tf.bool )     # Training phase

    self.BATCH      = tf.shape( self.X )[0]
    self.BT         = self.BATCH * self.T
    self.scope      = "Model"                       # Train only variables in scope

    self.l_action   = 1.0
    self.l_pose     = 1.0

    self.DIM_LSTM   = 512                           # Dimensionality of LSTM
    self.DIM_ATT    = 32                            # Either 32 (Sub-JHMDB) or 128 (PennAction)

    # Init ResNet
    self.net        = resnet.ResNet()               # We are using ResNet as base DCN. Change here.
    self.net.phase  = self.phase
Exemplo n.º 6
0
def train():
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    model = resnet.ResNet(args)
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver(max_to_keep=100)
    model.load_model(sess, saver)
    logger.info('model loaded')

    train_config = {'data_dir':args.train_dir}
    dtrain = dataset.DataSet(train_config)
    logger.info('dataset loaded')

    for epoch in range(args.beg_epoch, args.epochs):
        train_acc,train_loss = run_epoch(epoch, model, sess, dtrain)

        saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=epoch)
        logger.info('model of epoch {} saved'.format(epoch))

        s = '[epoch {}]train-acc:{:.3} train-loss:{:.3}'
        s = s.format(epoch,train_acc,train_loss)
        logger.warning(s)
        print(s)

        #if epoch!=args.beg_epoch and (epoch-args.beg_epoch)%10==0:
        if True:
            valid_acc,valid_time = run_test(epoch, model, sess, 10, 20)
            s = '[epoch {}]test-acc:{:.3} test-time:{}'.format(epoch, valid_acc, valid_time)
            logger.warning(s)
            print(s)

    logger.info('train end')
Exemplo n.º 7
0
def netMaker(args):
    '''Build a network
    Args:
        args (Namespace): the namespace containing all the arguments required for training and building the network
    Returns:
        the built network
    '''

    if args.dataset == "MNIST" or args.dataset == "FAKENIST":
        inSize = 28
        inChan = 1
        numClasses = 10
    elif args.dataset == "CIFAR10":
        inSize = 32
        inChan = 3
        numClasses = 10
    elif args.dataset == "IMAGENET":
        inSize = 224
        inChan = 3
        numClasses = 1000
    else:
        raise ValueError("Unknown dataset : {}".format(args.dataset))

    if args.model == "cnn":
        net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=False,inChan=inChan, width_per_group=args.dechan,\
                            strides=[2,2,2,2],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,conv=True)
    elif args.model == "gnn":
        net = GNN(inChan, args.chan_gnn, args.nb_lay_gnn, numClasses,
                  args.res_con_gnn, args.batch_norm_gnn, args.max_pool_pos,
                  args.max_pool_ker, False)
    elif args.model == "gnn_resnet_stri":
        net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\
                            strides=[2,2,2,2],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=False,conv=False)
    elif args.model == "gnn_resnet":
        net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\
                            strides=[1,1,1,1],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=False,conv=False)
    elif args.model == "gnn_resnet_mc":
        net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\
                            strides=[1,1,1,1],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=True,conv=False)
    elif args.model == "gcnn_resnet":
        net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\
                            strides=[2,2,2,2],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=False,conv=True)
    else:
        raise ValueError("Unknown model type : {}".format(args.model))

    return net
Exemplo n.º 8
0
    def load_model(self):
        # Load the model as defined by ` self.trained_model_prefix + "latest_model.pth" ` and already trained by train_model.py
        nb_outputs = ut.nbOutputs(self.label_style, self.environment)

        if self.model_type == 'dk_resnet18_CP':
            nb_outputs = 2  # FIXME: hard coded
            reward_fn_head = RewardFunctionHeadCartPole()
            net = RewardFunctionHeadModel(
                models.resnet18(pretrained=False, num_classes=nb_outputs),
                reward_fn_head)
        elif self.model_type == 'dk_resnet18_CP_weird':
            nb_outputs = 2  # FIXME: hard coded
            reward_fn_head = WeirdRewardFunctionHeadCartPole()
            net = RewardFunctionHeadModel(
                models.resnet18(pretrained=False, num_classes=nb_outputs),
                reward_fn_head)
        elif self.model_type == 'dk_resnet18_DT':
            nb_outputs = 2  # FIXME: hard coded
            reward_fn_head = RewardFunctionHeadDuckieTown()
            net = RewardFunctionHeadModel(
                models.resnet18(pretrained=False, num_classes=nb_outputs),
                reward_fn_head)
        elif self.model_type == 'resnet18':
            net = models.resnet18(pretrained=False, num_classes=nb_outputs)
            #### To use in case want the pretrained model: (remove num_classes as pretrained model only comes with original 1000 classes)
            # dim_feats = net.fc.in_features # =1000
            # net.fc = nn.Linear(dim_feats, nb_outputs)

        elif self.model_type == 'resnet34':
            net = resnet.resnet34(pretrained=False, num_classes=nb_outputs)
        elif self.model_type == 'resnet50':
            net = resnet.ResNet(resnet.Bottleneck, [3, 4, 6, 3],
                                num_classes=nb_outputs)
        elif self.model_type == 'resnet101':
            net = resnet.ResNet(resnet.Bottleneck, [3, 4, 23, 3],
                                num_classes=nb_outputs)
        elif self.model_type == 'resnet152':
            net = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3],
                                num_classes=nb_outputs)

        net.load_state_dict(torch.load(self.model_path))
        print('Loaded model')
        net.eval()
        net = net.to(self.device)
        return net
Exemplo n.º 9
0
 def __init__(self, input_channel, 
                 dropout = None, inp = "log-Filterbank-DCT-26-13",
                 nodes = [16, 32, 64, 128], num_layers = 2):
     
     super(resnet_extractor, self).__init__()
     
     import resnet
     self.model = resnet.ResNet(input_channel, resnet.ResidualBlock, nodes, num_layers, 
                                 dropout, inp)
     self.out_features = self.model.out_features
Exemplo n.º 10
0
    def initialize_net(self):
        nb_outputs = ut.nbOutputs(self.label_style, self.environment)

        if self.model == 'dk_resnet18_CP':
            nb_outputs = 2  # FIXME: hard coded
            reward_fn_head = RewardFunctionHeadCartPole()
            net = RewardFunctionHeadModel(
                models.resnet18(pretrained=False, num_classes=nb_outputs),
                reward_fn_head)
        elif self.model == 'dk_resnet18_CP_weird':
            nb_outputs = 2  # FIXME: hard coded
            reward_fn_head = WeirdRewardFunctionHeadCartPole()
            net = RewardFunctionHeadModel(
                models.resnet18(pretrained=False, num_classes=nb_outputs),
                reward_fn_head)
        elif self.model == 'dk_resnet18_DT':
            nb_outputs = 2  # FIXME: hard coded
            reward_fn_head = RewardFunctionHeadDuckieTown()
            net = RewardFunctionHeadModel(
                models.resnet18(pretrained=False, num_classes=nb_outputs),
                reward_fn_head)
        elif self.model == 'resnet18':
            net = models.resnet18(pretrained=False, num_classes=nb_outputs)
            #### To use in case want the pretrained model: (remove num_classes as pretrained model only comes with original 1000 classes)
            # dim_feats = net.fc.in_features # =1000
            # net.fc = nn.Linear(dim_feats, nb_outputs)

        elif self.model == 'resnet34':
            net = models.resnet34(pretrained=False, num_classes=nb_outputs)
        elif self.model == 'resnet50':
            net = resnet.ResNet(resnet.Bottleneck, [3, 4, 6, 3],
                                num_classes=nb_outputs)
        elif self.model == 'resnet101':
            net = resnet.ResNet(resnet.Bottleneck, [3, 4, 23, 3],
                                num_classes=nb_outputs)
        elif self.model == 'resnet152':
            net = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3],
                                num_classes=nb_outputs)

        net = net.float()
        net = net.to(self.device)
        return net
Exemplo n.º 11
0
def temperature_scaling():
    print("Calibration")
    state_dict = torch.load(args.save_path +
                            'resnet110_{0}.pth'.format(args.epoch))
    valid_indices = torch.load(args.save_path + 'valid_indices.pth')

    mean = [0.5071, 0.4867, 0.4408]
    stdv = [0.2675, 0.2565, 0.2761]
    test_transforms = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        tv.transforms.Normalize(mean=mean, std=stdv),
    ])

    valid_set = tv.datasets.CIFAR100(root='./cifar100_data/',
                                     train=True,
                                     transform=test_transforms,
                                     download=False)
    test_set = tv.datasets.CIFAR100(root='./cifar100_data/',
                                    train=False,
                                    transform=test_transforms,
                                    download=False)

    valid_loader = torch.utils.data.DataLoader(
        valid_set,
        batch_size=args.batch_size,
        sampler=SubsetRandomSampler(valid_indices))
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=100,
                                              num_workers=4)
    #valid_loader = valid_loader_pth
    criterion = nn.CrossEntropyLoss()
    network = resnet.ResNet(args.layer, 100).cuda()

    network.load_state_dict(state_dict)

    test_loss, test_acc, test_softmax, test_correct, test_y, test_loss_idv = test(
        test_loader, network, criterion, args.epoch + 1, 'test')

    for i in range(len(test_softmax)):
        test_softmax[i] = test_softmax[i].item()
    utlis.save_data('before_cali_correct', test_correct, args.save_path)
    utlis.save_data('before_cali_softmax', test_softmax, args.save_path)

    model = ModelWithTemperature(network)
    model.set_temperature(valid_loader)

    test_loss, test_acc, test_softmax, test_correct, test_y, test_loss_idv = test(
        test_loader, model, criterion, args.epoch + 1, 'test')

    for i in range(len(test_softmax)):
        test_softmax[i] = test_softmax[i].item()

    utlis.save_data('after_cali_correct', test_correct, args.save_path)
    utlis.save_data('after_cali_softmax', test_softmax, args.save_path)
Exemplo n.º 12
0
    def __init__(self, is_train=True):
        self.resnet = resnet.ResNet()
        self.resnet.cuda()

        if is_train:
            self.optimizer = torch.optim.SGD(self.resnet.parameters(),
                                             lr=C.LEARNING_RATE,
                                             momentum=0.9,
                                             weight_decay=1E-4)
            self.policy_loss_fn = MultiLableCrossEntropy()
            self.value_loss_fn = torch.nn.MSELoss()
Exemplo n.º 13
0
def test():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3])
    net = net.to(device)

    net.eval()
    with torch.no_grad():
        img_tensor = torch.randn(1, 3, 32, 32)
        img_tensor = img_tensor.to(device)
        y = net(img_tensor)
        print(y.size())
Exemplo n.º 14
0
def main(root_image, root_model):
    print("\nInput image ==> ", root_image)
    print("Current model ===> ", root_model)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("Run ", device)

    ## ResNet-50 : [3, 4, 6, 3]
    ## ResNet-101 : [3, 4, 23, 3]
    ## ResNet-152 : [3, 8, 36, 3]

    model = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3])
    model.load_state_dict(torch.load(root_model))
    model.to(device)

    # Input Image Transforms
    loader = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        # transforms.Normalize([0.5],[0.5]),
    ])

    model.eval()
    with torch.no_grad():
        # label list : 67 class
        label = os.listdir("./dataset/img/")
        label.sort()

        # to RGB
        image = Image.open(root_image)
        image = image.convert("RGB")

        # to Tensor, Cuda
        image_tensor = loader(image).unsqueeze(0)
        image_tensor = image_tensor.to(device)
        output = model(image_tensor)

        # Predict
        _, predicted = torch.max(output.data, 1)

        # Calc Probability
        s_max = torch.nn.Softmax(dim=0)
        prob = s_max(output.squeeze(0))
        prob_np = prob.cpu().numpy()

        index = int(predicted.item())

        predicted_name = label[index]
        probability = round(prob_np[index] * 100, 2)

        print("Predicted ===> name : {},  probability : {}".format(
            predicted_name, probability))
        '''
Exemplo n.º 15
0
    def __init__(self, trained_dataset='imagenet', depth=101, num_of_class=1000, path=None):
        model = RN.ResNet(trained_dataset, depth, num_of_class)

        model = torch.nn.DataParallel(model).cuda()

        if path is not None:
            print("=> loading checkpoint '{}'".format(path))
            checkpoint = torch.load(path)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}'".format(path))
            model.module.avgpool = Identity()
            model.module.fc = Identity()
        self.model = model
Exemplo n.º 16
0
 def _build_model(self):
     # Neural Net for Deep-Q learning Model
     # model = Sequential()
     # model.add(Dense(32, input_dim=self.state_size, activation='relu'))
     # model.add(Dense(64, activation='relu'))
     # model.add(Dense(self.action_size, activation='softmax'))
     # model.compile(loss=self._huber_loss,
     #               optimizer=Adam(lr=self.learning_rate))
     # resnet
     # model = resnet.resnet18(self.action_size)
     # model.build(input_shape=(None, 32, 32, 3))
     # model.compile(loss=self._huber_loss,
     #               optimizer=Adam(lr=self.learning_rate))
     # resnet go
     model = resnet.ResNet(self.action_size)
     model.build(input_shape=(None, 16, 16, 3))
     model.compile(loss=self._huber_loss,
                   optimizer=Adam(lr=self.learning_rate))
     return model
Exemplo n.º 17
0
def test():
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    model = resnet.ResNet(args)
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver(max_to_keep=100)

    for epoch in range(0, args.beg_epoch):
        args.model_path = os.path.join(args.model_dir, 'model.ckpt-{}'.format(epoch))
        model.load_model(sess, saver)
        valid_acc,valid_time = run_test(epoch, model, sess, 10, 20)
        s = '[epoch {}]test-acc:{:.3} test-time:{}'.format(epoch, valid_acc, valid_time)
        logger.warning(s)
        print(s)
    return

    logger.info('test end')
Exemplo n.º 18
0
 def __init__(self, args):
     super(Model, self).__init__()
     self.conv_layer = resnet.ResNet(31)
     self.recg = recog.Recogniton(args)
Exemplo n.º 19
0
def main():
    word_index_dict = json.load(open(args.word_index_json,
                                     encoding="utf-8"))  #一个汉字对应一个编号
    num_classes = len(word_index_dict)
    # xzy 加载label文件
    image_label_dict = json.load(open(args.image_label_json))  #一个图片对应多个其标签的编号
    # print(image_label_dict)
    cudnn.benchmark = True

    # xzy 加载模型
    if args.model == 'densenet':
        # 两千多种字符,multi-label分类
        if use_gpu:
            model = DenseNet121(num_classes).cuda()
        else:
            model = DenseNet121(num_classes)

    elif args.model == 'resnet':
        # resnet主要用于文字区域的segmentation以及object detection操作
        if use_gpu:
            model = resnet.ResNet(num_classes=num_classes, args=args).cuda()
        else:
            model = resnet.ResNet(num_classes=num_classes, args=args)
    else:
        return
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # model = torch.nn.DataParallel(model).cuda()
    if use_gpu:
        loss = Loss().cuda()
    else:
        loss = Loss()

    # if args.resume:
    if need_resume:
        print("let's begin resume")
        # state_dict = torch.load(args.resume)
        state_dict = torch.load(resume_ckpt_path)  #xzy
        model.load_state_dict(state_dict['state_dict'])  #xzy 断点训练、测试,都走这里。
        best_f1score = state_dict['f1score']
        start_epoch = state_dict['epoch'] + 1
        print("already resume " + str(resume_ckpt_path.split("/")[-1]))
    else:
        print("no resume")
        best_f1score = 0
        if args.model == 'resnet':
            start_epoch = 100
        else:
            start_epoch = 1
    args.epoch = start_epoch
    print('best_f1score', best_f1score)

    #xzy 加载、划分数据集
    # test_filelist = sorted(glob(os.path.join(args.data_dir,'test','*')))
    # trainval_filelist = sorted(glob(os.path.join(args.data_dir,'train','*')))
    test_filelist = sorted(glob(os.path.join(args.data_test_dir, '*')))
    trainval_filelist = sorted(glob(os.path.join(args.data_dir, '*')))
    print("训练数据集长度是" + str(len(trainval_filelist)))
    print("测试数据集长度是" + str(len(test_filelist)))

    # 两种输入size训练
    # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入
    # train_filelist2: 长宽比大于8:1的图片,经过padding,crop后变成 64*1024的输入
    train_filelist1, train_filelist2 = [], []

    # 黑名单,这些图片的label是有问题的
    black_list = set(json.load(open(args.black_json))['black_list'])
    # image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json))   #xzy
    for f in trainval_filelist:
        image = f.split('/')[-1]
        if image in black_list:
            continue
        # r = image_hw_ratio_dict[image]
        # if r == 0:
        if True:  #xzy 不用考虑判断是否1:8从而分散数据集,进行不同resize
            train_filelist1.append(f)
        else:
            train_filelist2.append(f)

    # train_val_filelist = train_filelist1 + train_filelist2
    train_val_filelist = train_filelist1
    val_filelist = train_filelist1[-2048:]
    train_filelist1 = train_filelist1[:-2048]

    # train_filelist2 = train_filelist2` ` #取消train_dataset2数据集
    # image_size = [512, 64]    32的16倍和2倍
    image_size = [288, 64]  #xzy  32的9倍和2倍

    if args.phase in ['test', 'val', 'train_val']:
        # 测试输出文字检测结果
        test_dataset = dataloader.DataSet(
            test_filelist,
            image_label_dict,
            num_classes,
            # transform=train_transform,
            args=args,
            image_size=image_size,
            phase='test')
        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=8,
                                 pin_memory=True)
        train_filelist = train_filelist1[-2048:]
        train_dataset = dataloader.DataSet(train_filelist,
                                           image_label_dict,
                                           num_classes,
                                           image_size=image_size,
                                           args=args,
                                           phase='test')
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=8,
                                  pin_memory=True)

        val_dataset = dataloader.DataSet(val_filelist,
                                         image_label_dict,
                                         num_classes,
                                         image_size=image_size,
                                         args=args,
                                         phase='test')
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=1,
                                shuffle=False,
                                num_workers=8,
                                pin_memory=True)

        train_val_dataset = dataloader.DataSet(train_val_filelist,
                                               image_label_dict,
                                               num_classes,
                                               image_size=image_size,
                                               args=args,
                                               phase='test')
        train_val_loader = DataLoader(dataset=train_val_dataset,
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=8,
                                      pin_memory=True)

        if args.phase == 'test':
            print("now , let's do phase of test")
            test(start_epoch - 1, model, val_loader, 'val')
            test(start_epoch - 1, model, test_loader, 'test')
            # test(start_epoch - 1, model, train_val_loader, 'train_val')
        elif args.phase == 'val':
            test(start_epoch - 1, model, train_loader, 'train')
            test(start_epoch - 1, model, val_loader, 'val')
        elif args.phase == 'train_val':
            test(start_epoch - 1, model, train_val_loader, 'train_val')
        return

    elif args.phase == 'train':
        # print(train_filelist1[:10])
        train_dataset1 = dataloader.DataSet(
            train_filelist1,
            image_label_dict,
            num_classes,
            image_size=image_size,  #    image_size = [512, 64]
            args=args,
            phase='train')
        train_loader1 = DataLoader(dataset=train_dataset1,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=8,
                                   pin_memory=True)
        train_dataset2 = dataloader.DataSet(train_filelist2,
                                            image_label_dict,
                                            num_classes,
                                            image_size=(1024, 64),
                                            args=args,
                                            phase='train')
        train_loader2 = DataLoader(dataset=train_dataset2,
                                   batch_size=int(args.batch_size / 2),
                                   shuffle=True,
                                   num_workers=8,
                                   pin_memory=True)
        val_dataset = dataloader.DataSet(val_filelist,
                                         image_label_dict,
                                         num_classes,
                                         image_size=image_size,
                                         args=args,
                                         phase='val')
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=min(8, args.batch_size),
                                shuffle=False,
                                num_workers=8,
                                pin_memory=True)
        filelist = glob(os.path.join(args.bg_dir, '*'))
        pretrain_dataset1 = dataloader.DataSet(filelist,
                                               image_label_dict,
                                               num_classes,
                                               image_size=args.image_size,
                                               word_index_dict=word_index_dict,
                                               args=args,
                                               font_range=[8, 32],
                                               margin=10,
                                               rotate_range=[-10., 10.],
                                               phase='pretrain')
        pretrain_loader1 = DataLoader(dataset=pretrain_dataset1,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=8,
                                      pin_memory=True)
        pretrain_dataset2 = dataloader.DataSet(filelist,
                                               image_label_dict,
                                               num_classes,
                                               image_size=(256, 128),
                                               word_index_dict=word_index_dict,
                                               args=args,
                                               font_range=[24, 64],
                                               margin=20,
                                               rotate_range=[-20., 20.],
                                               phase='pretrain')
        pretrain_loader2 = DataLoader(dataset=pretrain_dataset2,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=8,
                                      pin_memory=True)

        best_f1score = 0
        # eval_mode = 'pretrain-2'
        eval_mode = 'eval'
        for epoch in range(start_epoch, args.epochs):

            args.epoch = epoch

            if eval_mode == 'eval':
                if best_f1score > 0.9:
                    args.lr = 0.0001
                if best_f1score > 0.9:
                    args.hard_mining = 1

            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

            train_eval(epoch, model, train_loader1, loss, optimizer, 2.,
                       'train-1')
            # if best_f1score > 0.9:
            #     train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2') #取消train_dataset2数据集
            best_f1score = train_eval(
                epoch, model, val_loader, loss, optimizer, best_f1score,
                'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining))
            continue
            '''
Exemplo n.º 20
0
def main():
    global args, best_err1, best_err5, global_epoch_confusion, best_loss
    args = parser.parse_args()

    if args.dataset.startswith('cifar'):
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

        transform_test = transforms.Compose([transforms.ToTensor(), normalize])

        if args.dataset == 'cifar100':
            train_loader = torch.utils.data.DataLoader(
                datasets.CIFAR100('../data',
                                  train=True,
                                  download=True,
                                  transform=transform_train),
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=args.workers,
                pin_memory=True)
            val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR100('../data',
                                  train=False,
                                  transform=transform_test),
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=args.workers,
                pin_memory=True)
            numberofclass = 100
        elif args.dataset == 'cifar10':
            train_data = datasets.CIFAR10('../data',
                                          train=True,
                                          download=True,
                                          transform=transform_train)
            print(train_data.targets[:30])
            print(train_data.targets[:30])
            print(len(train_data))

            class_counts = [9.0, 1.0]
            num_samples = sum(class_counts)
            labels = [0, 0, ..., 0, 1]  #corresponding labels of samples

            class_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
            class_weights[args.first] = args.weight
            class_weights[args.second] = args.weight
            print(class_weights)
            weights = [
                class_weights[train_data.targets[i]]
                for i in range(len(train_data))
            ]
            sampler = WeightedRandomSampler(torch.DoubleTensor(weights),
                                            len(train_data))
            train_loader = torch.utils.data.DataLoader(
                train_data,
                batch_size=args.batch_size,
                shuffle=False,
                num_workers=args.workers,
                pin_memory=True,
                sampler=sampler)
            val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data',
                                 train=False,
                                 transform=transform_test),
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=args.workers,
                pin_memory=True)
            numberofclass = 10

        else:
            raise Exception('unknown dataset: {}'.format(args.dataset))
    else:
        raise Exception('unknown dataset: {}'.format(args.dataset))

    print("=> creating model '{}'".format(args.net_type))
    if args.net_type == 'resnet':
        model = RN.ResNet(args.dataset, args.depth, numberofclass,
                          args.bottleneck)  # for ResNet
    elif args.net_type == 'pyramidnet':
        model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha,
                                numberofclass, args.bottleneck)
    else:
        raise Exception('unknown network architecture: {}'.format(
            args.net_type))

    model = torch.nn.DataParallel(model).cuda()

    if os.path.isfile(args.pretrained):
        print("=> loading checkpoint '{}'".format(args.pretrained))
        checkpoint = torch.load(args.pretrained)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(args.pretrained))

    # print(model)
    print('the number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss(reduction='none').cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)

    cudnn.benchmark = True
    #validate(val_loader, model, criterion, 0)

    # for checking pre-trained model accuracy and confusion
    if args.checkmodel:
        global_epoch_confusion.append({})
        get_confusion(val_loader, model, criterion)
        # cat->dog confusion
        log_print(str(args.first) + " -> " + str(args.second))
        log_print(global_epoch_confusion[-1]["confusion"][(args.first,
                                                           args.second)])
        # dog->cat confusion
        log_print(str(args.second) + " -> " + str(args.first))
        log_print(global_epoch_confusion[-1]["confusion"][(args.second,
                                                           args.first)])
        exit()

    for epoch in range(0, args.epochs):
        global_epoch_confusion.append({})
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        err1, err5, val_loss = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint

        if epoch // (args.epochs * 0.75):
            is_best = err1 <= best_err1
            best_err1 = min(err1, best_err1)
            if is_best:
                best_err5 = err5
                best_err1 = err1

            print('Current best accuracy (top-1 and 5 error):', best_err1,
                  best_err5)
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.net_type,
                    'state_dict': model.state_dict(),
                    'best_err1': best_err1,
                    'best_err5': best_err5,
                    'optimizer': optimizer.state_dict(),
                }, is_best)

        get_confusion(val_loader, model, criterion, epoch)
        # cat->dog confusion
        log_print(str(args.first) + " -> " + str(args.second))
        log_print(global_epoch_confusion[-1]["confusion"][(args.first,
                                                           args.second)])
        # dog->cat confusion
        log_print(str(args.second) + " -> " + str(args.first))
        log_print(global_epoch_confusion[-1]["confusion"][(args.second,
                                                           args.first)])

    print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)
    directory = "runs/%s/" % (args.expname)
    if not os.path.exists(directory):
        os.makedirs(directory)
    epoch_confusions = 'runs/%s/' % (args.expname) + \
        'epoch_confusion_' + args.expid
    np.save(epoch_confusions, global_epoch_confusion)
    log_print("")
    # output best model accuracy and confusion
    repaired_model = 'runs/%s/' % (args.expname) + 'model_best.pth.tar'
    if os.path.isfile(repaired_model):
        print("=> loading checkpoint '{}'".format(repaired_model))
        checkpoint = torch.load(repaired_model)
        model.load_state_dict(checkpoint['state_dict'])
        get_confusion(val_loader, model, criterion)
        # dog->cat confusion
        log_print(str(args.first) + " -> " + str(args.second))
        log_print(global_epoch_confusion[-1]["confusion"][(args.first,
                                                           args.second)])
        # cat->dog confusion
        log_print(str(args.second) + " -> " + str(args.first))
        log_print(global_epoch_confusion[-1]["confusion"][(args.second,
                                                           args.first)])
Exemplo n.º 21
0
def main():
    parser = argparse.ArgumentParser(
        description='Learning CIFAR-10 using ResNet')
    parser.add_argument('--batchsize',
                        type=int,
                        default=256,
                        help='Leaning minibatch size')
    parser.add_argument('--epoch',
                        type=int,
                        default=365,
                        help='Number of epochs to train')
    parser.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='GPU id (-1 indicates CPU)')
    parser.add_argument('--loaders',
                        type=int,
                        default=1,
                        help='Number of data loading processes')
    parser.add_argument('--out',
                        default='./result',
                        help='Path of output directory')
    parser.add_argument('--path',
                        default='./cifar-10-batches-py',
                        help='Path of dataset files')
    parser.add_argument('--test', action='store_true')
    args = parser.parse_args()

    print('==========================================')
    print('Num Minibatch-size: {}'.format(args.batchsize))
    print('Num Epoch: {}'.format(args.epoch))
    print('==========================================')

    # Model
    model = resnet.ResNet()
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Dataset
    data, labels = load_cifar10_dataset(args.path)
    mean = np.mean(data, axis=0)
    train = Cifar10Dataset(data[0:45000],
                           labels[0:45000],
                           train=True,
                           mean=mean)
    val = Cifar10Dataset(data[45000:50000],
                         labels[45000:50000],
                         train=False,
                         mean=mean)

    # Iterators
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaders)
    val_iter = chainer.iterators.MultiprocessIterator(val,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False,
                                                      n_processes=args.loaders)

    # Optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    # Trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    # Validation
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)

    # Learning rate decay
    lr_interval = training.triggers.ManualScheduleTrigger([32000, 48000],
                                                          'iteration')
    trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_interval)

    # Log
    log_interval = (10, 'iteration') if args.test else (1, 'epoch')
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 22
0
def train():
    print('[Dataset Configuration]')
    print('\tImageNet training root: %s' % FLAGS.train_image_root)
    print('\tImageNet training list: %s' % FLAGS.train_dataset)
    print('\tImageNet val root: %s' % FLAGS.val_image_root)
    print('\tImageNet val list: %s' % FLAGS.val_dataset)
    print('\tNumber of classes: %d' % FLAGS.num_classes)
    print('\tNumber of training images: %d' % FLAGS.num_train_instance)
    print('\tNumber of val images: %d' % FLAGS.num_val_instance)

    print('[Network Configuration]')
    print('\tBatch size: %d' % FLAGS.batch_size)
    print('\tNumber of GPUs: %d' % FLAGS.num_gpus)
    print('\tBasemodel file: %s' % FLAGS.basemodel)

    print('[Optimization Configuration]')
    print('\tL2 loss weight: %f' % FLAGS.l2_weight)
    print('\tThe momentum optimizer: %f' % FLAGS.momentum)
    print('\tInitial learning rate: %f' % FLAGS.initial_lr)
    print('\tEpochs per lr step: %s' % FLAGS.lr_step_epoch)
    print('\tLearning rate decay: %f' % FLAGS.lr_decay)

    print('[Training Configuration]')
    print('\tTrain dir: %s' % FLAGS.train_dir)
    print('\tTraining max steps: %d' % FLAGS.max_steps)
    print('\tSteps per displaying info: %d' % FLAGS.display)
    print('\tSteps per validation: %d' % FLAGS.val_interval)
    print('\tSteps during validation: %d' % FLAGS.val_iter)
    print('\tSteps per saving checkpoints: %d' % FLAGS.checkpoint_interval)
    print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction)
    print('\tLog device placement: %d' % FLAGS.log_device_placement)

    with tf.Graph().as_default():
        init_step = 0
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # Get images and labels of ImageNet
        import multiprocessing
        num_threads = multiprocessing.cpu_count() / FLAGS.num_gpus
        print('Load ImageNet dataset(%d threads)' % num_threads)
        with tf.device('/cpu:0'):
            print('\tLoading training data from %s' % FLAGS.train_dataset)
            with tf.variable_scope('train_image'):
                train_images, train_labels = data_input.distorted_inputs(
                    FLAGS.train_image_root,
                    FLAGS.train_dataset,
                    FLAGS.batch_size,
                    True,
                    num_threads=num_threads,
                    num_sets=FLAGS.num_gpus)
            print('\tLoading validation data from %s' % FLAGS.val_dataset)
            with tf.variable_scope('test_image'):
                val_images, val_labels = data_input.inputs(
                    FLAGS.val_image_root,
                    FLAGS.val_dataset,
                    FLAGS.batch_size,
                    False,
                    num_threads=num_threads,
                    num_sets=FLAGS.num_gpus)
            tf.summary.image('images', train_images[0][:2])

        # Build model
        lr_decay_steps = map(float, FLAGS.lr_step_epoch.split(','))
        lr_decay_steps = list(
            map(int, [
                s * FLAGS.num_train_instance / FLAGS.batch_size /
                FLAGS.num_gpus for s in lr_decay_steps
            ]))
        hp = resnet.HParams(batch_size=FLAGS.batch_size,
                            num_gpus=FLAGS.num_gpus,
                            num_classes=FLAGS.num_classes,
                            weight_decay=FLAGS.l2_weight,
                            momentum=FLAGS.momentum,
                            finetune=FLAGS.finetune)
        network_train = resnet.ResNet(hp,
                                      train_images,
                                      train_labels,
                                      global_step,
                                      name="train")
        network_train.build_model()
        network_train.build_train_op()
        train_summary_op = tf.summary.merge_all()  # Summaries(training)
        network_val = resnet.ResNet(hp,
                                    val_images,
                                    val_labels,
                                    global_step,
                                    name="val",
                                    reuse_weights=True)
        network_val.build_model()
        print('Number of Weights: %d' % network_train._weights)
        print('FLOPs: %d' % network_train._flops)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            allow_soft_placement=False,
            # allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=10000)
        if FLAGS.checkpoint is not None:
            print('Load checkpoint %s' % FLAGS.checkpoint)
            saver.restore(sess, FLAGS.checkpoint)
            init_step = global_step.eval(session=sess)
        elif FLAGS.basemodel:
            # Define a different saver to save model checkpoints
            print('Load parameters from basemodel %s' % FLAGS.basemodel)
            variables = tf.global_variables()
            vars_restore = [
                var for var in variables
                if not "Momentum" in var.name and not "global_step" in var.name
            ]
            saver_restore = tf.train.Saver(vars_restore, max_to_keep=10000)
            saver_restore.restore(sess, FLAGS.basemodel)
        else:
            print(
                'No checkpoint file of basemodel found. Start from the scratch.'
            )

        # Start queue runners & summary_writer
        tf.train.start_queue_runners(sess=sess)

        if not os.path.exists(FLAGS.train_dir):
            os.mkdir(FLAGS.train_dir)
        summary_writer = tf.summary.FileWriter(
            os.path.join(FLAGS.train_dir, str(global_step.eval(session=sess))),
            sess.graph)

        # Training!
        val_best_acc = 0.0
        for step in range(init_step, FLAGS.max_steps):
            # val
            if step % FLAGS.val_interval == 0:
                val_loss, val_acc = 0.0, 0.0
                for i in range(FLAGS.val_iter):
                    loss_value, acc_value = sess.run(
                        [network_val.loss, network_val.acc],
                        feed_dict={network_val.is_train: False})
                    val_loss += loss_value
                    val_acc += acc_value
                val_loss /= FLAGS.val_iter
                val_acc /= FLAGS.val_iter
                val_best_acc = max(val_best_acc, val_acc)
                format_str = ('%s: (val)     step %d, loss=%.4f, acc=%.4f')
                print(format_str % (datetime.now(), step, val_loss, val_acc))

                val_summary = tf.Summary()
                val_summary.value.add(tag='val/loss', simple_value=val_loss)
                val_summary.value.add(tag='val/acc', simple_value=val_acc)
                val_summary.value.add(tag='val/best_acc',
                                      simple_value=val_best_acc)
                summary_writer.add_summary(val_summary, step)
                summary_writer.flush()

            # Train
            lr_value = get_lr(FLAGS.initial_lr, FLAGS.lr_decay, lr_decay_steps,
                              step)
            start_time = time.time()
            # For timeline profiling
            # if step == 153:
            # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            # run_metadata = tf.RunMetadata()
            # _, loss_value, acc_value, train_summary_str = \
            # sess.run([network_train.train_op, network_train.loss, network_train.acc, train_summary_op],
            # feed_dict={network_train.is_train:True, network_train.lr:lr_value}
            # , options=run_options, run_metadata=run_metadata)
            # # Create the Timeline object, and write it to a json
            # tl = timeline.Timeline(run_metadata.step_stats)
            # ctf = tl.generate_chrome_trace_format()
            # with open('timeline.json', 'w') as f:
            # f.write(ctf)
            # print('Wrote the timeline profile of %d iter training on %s' %(step, 'timeline.json'))
            # else:
            # _, loss_value, acc_value, train_summary_str = \
            # sess.run([network_train.train_op, network_train.loss, network_train.acc, train_summary_op],
            # feed_dict={network_train.is_train:True, network_train.lr:lr_value})
            _, loss_value, acc_value, train_summary_str = \
                    sess.run([network_train.train_op, network_train.loss, network_train.acc, train_summary_op],
                            feed_dict={network_train.is_train:True, network_train.lr:lr_value})
            duration = time.time() - start_time

            assert not np.isnan(loss_value)

            # Display & Summary(training)
            if step % FLAGS.display == 0 or step < 10:
                num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: (Training) step %d, loss=%.4f, acc=%.4f, lr=%f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str %
                      (datetime.now(), step, loss_value, acc_value, lr_value,
                       examples_per_sec, sec_per_batch))
                summary_writer.add_summary(train_summary_str, step)

            # Save the model checkpoint periodically.
            if (step > init_step and step % FLAGS.checkpoint_interval
                    == 0) or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

            if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
                char = sys.stdin.read(1)
                if char == 'b':
                    embed()
Exemplo n.º 23
0
def main():
    global args, best_err1, best_err5
    args = parser.parse_args()


    if args.dataset.startswith('cifar'):
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                         std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            normalize
        ])

        if args.dataset == 'cifar100':
            train_loader = torch.utils.data.DataLoader(
                datasets.CIFAR100('../data', train=True, download=True, transform=transform_train),
                batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
            val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR100('../data', train=False, transform=transform_test),
                batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
            numberofclass = 100
        elif args.dataset == 'cifar10':
            train_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=True, download=True, transform=transform_train),
                batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
            val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=False, transform=transform_test),
                batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
            numberofclass = 10
        else:
            raise Exception('unknown dataset: {}'.format(args.dataset))

    elif args.dataset == 'imagenet':
        traindir = os.path.join('/home/data/ILSVRC/train')
        valdir = os.path.join('/home/data/ILSVRC/val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        jittering = utils.ColorJitter(brightness=0.4, contrast=0.4,
                                      saturation=0.4)
        lighting = utils.Lighting(alphastd=0.1,
                                  eigval=[0.2175, 0.0188, 0.0045],
                                  eigvec=[[-0.5675, 0.7192, 0.4009],
                                          [-0.5808, -0.0045, -0.8140],
                                          [-0.5836, -0.6948, 0.4203]])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                jittering,
                lighting,
                normalize,
            ]))

        train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
            num_workers=args.workers, pin_memory=True, sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(valdir, transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])),
            batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True)
        numberofclass = 1000

    else:
        raise Exception('unknown dataset: {}'.format(args.dataset))

    print("=> creating model '{}'".format(args.net_type))
    if args.net_type == 'resnet':
        model = RN.ResNet(args.dataset, args.depth, numberofclass, args.bottleneck)  # for ResNet
    elif args.net_type == 'pyramidnet':
        model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass,
                                args.bottleneck)
    else:
        raise Exception('unknown network architecture: {}'.format(args.net_type))

    model = torch.nn.DataParallel(model).cuda()

    print(model)
    print('the number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay, nesterov=True)


    cudnn.benchmark = True


    for epoch in range(0, args.epochs):

        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train_loss = train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        err1, err5, val_loss = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = err1 <= best_err1
        best_err1 = min(err1, best_err1)
        if is_best:
            best_err5 = err5

        print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5)
        save_checkpoint({
            'epoch': epoch,
            'arch': args.net_type,
            'state_dict': model.state_dict(),
            'best_err1': best_err1,
            'best_err5': best_err5,
            'optimizer': optimizer.state_dict(),
        }, is_best)

    print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)
Exemplo n.º 24
0
def train():
    print('[Dataset Configuration]')
    print('\tCIFAR-100 dir: %s' % FLAGS.data_dir)
    print('\tNumber of classes: %d' % FLAGS.num_classes)
    print('\tNumber of test images: %d' % FLAGS.num_test_instance)

    print('[Network Configuration]')
    print('\tBatch size: %d' % FLAGS.batch_size)
    print('\tResidual blocks per group: %d' % FLAGS.num_residual_units)
    print('\tNetwork width multiplier: %d' % FLAGS.k)

    print('[Testing Configuration]')
    print('\tCheckpoint path: %s' % FLAGS.ckpt_path)
    print('\tDataset: %s' % ('Training' if FLAGS.train_data else 'Test'))
    print('\tNumber of testing iterations: %d' % FLAGS.test_iter)
    print('\tOutput path: %s' % FLAGS.output)
    print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction)
    print('\tLog device placement: %d' % FLAGS.log_device_placement)

    with tf.Graph().as_default():

        # Build a Graph that computes the predictions from the inference model.
        images = tf.placeholder(
            tf.float32,
            [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3])
        labels = tf.placeholder(tf.int32, [FLAGS.batch_size])

        # Build model
        decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size
        hp = resnet.HParams(batch_size=FLAGS.batch_size,
                            num_classes=FLAGS.num_classes,
                            num_residual_units=FLAGS.num_residual_units,
                            k=FLAGS.k,
                            weight_decay=FLAGS.l2_weight,
                            initial_lr=FLAGS.initial_lr,
                            decay_step=decay_step,
                            lr_decay=FLAGS.lr_decay,
                            momentum=FLAGS.momentum)
        network = resnet.ResNet(hp, images, labels, None)
        network.build_model()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000)
        if os.path.isdir(FLAGS.ckpt_path):
            ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path)
            # Restores from checkpoint
            if ckpt and ckpt.model_checkpoint_path:
                print('\tRestore from %s' % ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print('No checkpoint file found in the dir [%s]' %
                      FLAGS.ckpt_path)
                sys.exit(1)
        elif os.path.isfile(FLAGS.ckpt_path):
            print('\tRestore from %s' % FLAGS.ckpt_path)
            saver.restore(sess, FLAGS.ckpt_path)
        else:
            print('No checkpoint file found in the path [%s]' %
                  FLAGS.ckpt_path)
            sys.exit(1)

        graph = tf.get_default_graph()
        block_num = 3
        conv_num = 2
        old_kernels_to_cluster = []
        old_kernels_to_add = []
        old_batch_norm = []
        for i in range(1, block_num + 1):
            for j in range(FLAGS.num_residual_units):
                old_kernels_to_cluster.append(get_kernel(i, j, 1, graph, sess))
                old_kernels_to_add.append(get_kernel(i, j, 2, graph, sess))
                old_batch_norm.append(get_batch_norm(i, j, 2, graph, sess))
        #old_batch_norm = old_batch_norm[1:]
        #old_batch_norm.append(get_last_batch_norm(graph, sess))

        new_params = []
        new_width = [
            16,
            int(16 * FLAGS.new_k),
            int(32 * FLAGS.new_k),
            int(64 * FLAGS.new_k)
        ]
        for i in range(len(old_batch_norm)):
            cluster_num = new_width[int(i / 4) + 1]
            cluster_kernels, cluster_indices = cluster_kernel(
                old_kernels_to_cluster[i], cluster_num)
            add_kernels = add_kernel(old_kernels_to_add[i], cluster_indices,
                                     cluster_num)
            cluster_batchs_norm = cluster_batch_norm(old_batch_norm[i],
                                                     cluster_indices,
                                                     cluster_num)
            new_params.append(cluster_kernels)
            for p in range(BATCH_NORM_PARAM_NUM):
                new_params.append(cluster_batchs_norm[p])
            new_params.append(add_kernels)

        # save variables
        init_params = []
        new_param_index = 0
        for var in tf.global_variables():
            update_match = UPDATE_PARAM_REGEX.match(var.name)
            skip_match = SKIP_PARAM_REGEX.match(var.name)
            if update_match and not skip_match:
                print("update {}".format(var.name))
                init_params.append((new_params[new_param_index], var.name))
                new_param_index += 1
            else:
                print("not update {}".format(var.name))
                var_vector = sess.run(var)
                init_params.append((var_vector, var.name))

        #close old graph
        sess.close()
    tf.reset_default_graph()

    # build new graph and eval
    with tf.Graph().as_default():
        # The CIFAR-100 dataset
        with tf.variable_scope('test_image'):
            test_images, test_labels = data_input.input_fn(
                FLAGS.data_dir,
                FLAGS.batch_size,
                train_mode=FLAGS.train_data,
                num_threads=1)

        # The class labels
        with open(os.path.join(FLAGS.data_dir, 'fine_label_names.txt')) as fd:
            classes = [temp.strip() for temp in fd.readlines()]

        images = tf.placeholder(
            tf.float32,
            [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3])
        labels = tf.placeholder(tf.int32, [FLAGS.batch_size])

        new_network = resnet.ResNet(hp, images, labels, None, init_params,
                                    FLAGS.new_k)
        new_network.build_model()

        init = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Testing!
        result_ll = [[0, 0] for _ in range(FLAGS.num_classes)
                     ]  # Correct/wrong counts for each class
        test_loss = 0.0, 0.0
        for i in range(FLAGS.test_iter):
            test_images_val, test_labels_val = sess.run(
                [test_images, test_labels])
            preds_val, loss_value, acc_value = sess.run(
                [new_network.preds, new_network.loss, new_network.acc],
                feed_dict={
                    new_network.is_train: False,
                    images: test_images_val,
                    labels: test_labels_val
                })
            test_loss += loss_value
            for j in range(FLAGS.batch_size):
                correct = 0 if test_labels_val[j] == preds_val[j] else 1
                result_ll[test_labels_val[j] % FLAGS.num_classes][correct] += 1
        test_loss /= FLAGS.test_iter

        # Summary display & output
        acc_list = [float(r[0]) / float(r[0] + r[1]) for r in result_ll]
        result_total = np.sum(np.array(result_ll), axis=0)
        acc_total = float(result_total[0]) / np.sum(result_total)

        print('Class    \t\t\tT\tF\tAcc.')
        format_str = '%-31s %7d %7d %.5f'
        for i in range(FLAGS.num_classes):
            print(format_str %
                  (classes[i], result_ll[i][0], result_ll[i][1], acc_list[i]))
        print(format_str %
              ('(Total)', result_total[0], result_total[1], acc_total))

        # Output to file(if specified)
        if FLAGS.output.strip():
            with open(FLAGS.output, 'w') as fd:
                fd.write('Class    \t\t\tT\tF\tAcc.\n')
                format_str = '%-31s %7d %7d %.5f'
                for i in range(FLAGS.num_classes):
                    t, f = result_ll[i]
                    format_str = '%-31s %7d %7d %.5f\n'
                    fd.write(format_str %
                             (classes[i].replace(' ', '-'), t, f, acc_list[i]))
                fd.write(
                    format_str %
                    ('(Total)', result_total[0], result_total[1], acc_total))
Exemplo n.º 25
0
def train():
    def Load():

        if (FLAGS.is_Simple or FLAGS.is_Train == False):
            train_images = np.load("Input/test_data.npy")
            train_labels = np.load("Input/test_label.npy")
        else:
            train_data = np.load("Input/train_data.npy")
            train_label = np.load("Input/train_label.npy")
            permutation = np.random.permutation(train_data.shape[0])
            train_images = train_data[permutation, :, :, :]
            train_labels = train_label[permutation, :]
        test_data = np.load("Input/test_data.npy")
        test_label = np.load("Input/test_label.npy")
        mean_data = np.array(np.load("Input/mean_data.npy"), dtype=np.float16)
        mean_label = np.load("Input/mean_label.npy")
        std_label = np.load("Input/std_label.npy")

        return train_images, train_labels, test_data, test_label, mean_data, mean_label, std_label

    with tf.Graph().as_default():
        init_step = 0
        global_step = tf.Variable(0, trainable=False, name='global_step')

        X = tf.placeholder(tf.float32, [None, 224, 224, 3],
                           name='Input_Images')
        SHAPE = tf.placeholder(tf.float32, [None, 100], name='SHAPE')
        EXP = tf.placeholder(tf.float32, [None, 79], name='EXP')
        EULAR = tf.placeholder(tf.float32, [None, 3], name='EULAR')
        T = tf.placeholder(tf.float32, [None, 2], name='T')
        S = tf.placeholder(tf.float32, [None], name='S')

        # Build model
        hp = resnet.HParams(batch_size=FLAGS.batch_size,
                            num_gpus=FLAGS.num_gpus,
                            num_output=FLAGS.dim_output,
                            weight_decay=FLAGS.l2_weight,
                            momentum=FLAGS.momentum,
                            finetune=FLAGS.finetune)

        network_train = resnet.ResNet(hp,
                                      X,
                                      SHAPE,
                                      EXP,
                                      EULAR,
                                      T,
                                      S,
                                      global_step,
                                      name="train")
        network_train.build_model()
        network_train.build_train_op()
        train_summary_op = tf.summary.merge_all()  # Summaries(training)
        print('Number of Weights: %d' % network_train._weights)
        print('FLOPs: %d' % network_train._flops)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()
        print("sess 0")
        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            allow_soft_placement=False,
            # allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        print("sess 1")
        sess.run(init)
        print("sess done")
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)

        if (FLAGS.is_Train == False):

            checkpoint_dir = FLAGS.train_dir  # os.path.join(export_dir, 'checkpoint')
            checkpoints = tf.train.get_checkpoint_state(checkpoint_dir)
            if checkpoints and checkpoints.model_checkpoint_path:
                checkpoints_name = os.path.basename(
                    checkpoints.model_checkpoint_path)
                saver.restore(sess,
                              os.path.join(checkpoint_dir, checkpoints_name))
            print('Load checkpoint %s' % checkpoints_name)

            init_step = global_step.eval(session=sess)
        else:
            checkpoint_dir = FLAGS.train_dir  # os.path.join(export_dir, 'checkpoint')
            checkpoints = tf.train.get_checkpoint_state(checkpoint_dir)
            if checkpoints and checkpoints.model_checkpoint_path:
                checkpoints_name = os.path.basename(
                    checkpoints.model_checkpoint_path)
                saver.restore(sess,
                              os.path.join(checkpoint_dir, checkpoints_name))
            print('Load checkpoint %s' % checkpoints_name)
            init_step = global_step.eval(session=sess)
            #print('Start from the scratch.')

        # if not os.path.exists(FLAGS.train_dir):
        #     os.mkdir(FLAGS.train_dir)
        # summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.train_dir, str(global_step.eval(session=sess))),
        #                                         sess.graph)

        # Training!
        train_images, train_labels, test_data, test_label, mean_data, mean_label, std_label = Load(
        )
        one_epoch_step = int(len(train_labels) / FLAGS.batch_size)
        train_images = (train_images - mean_data) / 255.0
        #train_labels = (train_labels-mean_label)/std_label
        test_data = (test_data - mean_data) / 255.0
        #test_label= (test_label - mean_label)/std_label
        print("data done")

        if (FLAGS.is_CustomTest == True):
            batch_data = (np.load('Input/gx.npy') - mean_data) / 255.0
            batch_labels = np.zeros((len(batch_data), 185))
            tmp = np.zeros((len(batch_data), 185))
            shape_logits, exp_logits, eular_logits, t_logits, s_logits = sess.run(
                [network_train.shape_logits, network_train.exp_logits,
                 network_train.eular_logits, network_train.t_logits,
                 network_train.s_logits, ], \
                feed_dict={network_train.is_train: False, X: batch_data,
                           SHAPE: batch_labels[:, :100], EXP: batch_labels[:, 100:179],
                           EULAR: batch_labels[:, 179:182], T: batch_labels[:, 182:184],
                           S: batch_labels[:, 184]})
            tmp[:, 0:100] = np.array(exp_logits)
            tmp[:, 100:179] = np.array(shape_logits)
            tmp[:, 179:182] = np.array(eular_logits)
            tmp[:, 182:184] = np.array(t_logits)
            tmp[:, 184][:, None] = np.array(s_logits)
            np.savetxt("tmp/gx.txt", tmp)

        elif (FLAGS.is_Train == False):

            max_iteration = int(len(test_label) / FLAGS.batch_size)
            print("max iteration is " + str(max_iteration))
            loss_ = 0
            tmp = np.zeros([185])
            for i in range(10):
                print(i)
                offset = (i * FLAGS.batch_size) % (test_data.shape[0] -
                                                   FLAGS.batch_size)
                batch_data = test_data[offset:(offset +
                                               FLAGS.batch_size), :, :, :]
                batch_labels = test_label[offset:(offset +
                                                  FLAGS.batch_size), :]

                shape_logits,exp_logits,eular_logits,t_logits,s_logits = sess.run([network_train.shape_logits,network_train.exp_logits,
                                                                                   network_train.eular_logits,network_train.t_logits,
                                                                                   network_train.s_logits,],\
                                             feed_dict={network_train.is_train: False,  X: batch_data,
                                                       SHAPE: batch_labels[:,:100],EXP:batch_labels[:,100:179],
                                                       EULAR:batch_labels[:,179:182],T:batch_labels[:,182:184],
                                                       S:batch_labels[:,184]})
                tmp[0:100] = np.array(exp_logits[0, :])
                tmp[100:179] = np.array(shape_logits[0, :])
                tmp[179:182] = np.array(eular_logits[0, :])
                tmp[182:184] = np.array(t_logits[0, :])
                tmp[184] = np.array(s_logits[0, :])

                #loss_+=loss_value[0]
                #print("test loss = " +str(loss_/ max_iteration))
                np.savetxt("tmp/" + str(i) + ".txt", tmp)
                fig = np.array((batch_data[0, :, :, :] * 255 + mean_data),
                               dtype=np.uint8)
                cv2.imwrite("tmp/" + str(i) + ".jpg", fig)

        else:

            for step in range(init_step, FLAGS.max_steps):

                offset = (step * FLAGS.batch_size) % (train_labels.shape[0] -
                                                      FLAGS.batch_size)
                batch_data = train_images[offset:(offset +
                                                  FLAGS.batch_size), :, :, :]
                batch_labels = train_labels[offset:(offset +
                                                    FLAGS.batch_size), :]
                # Train
                lr_value = get_lr(FLAGS.initial_lr, FLAGS.lr_decay,
                                  one_epoch_step, step)
                start_time = time.time()
                _, loss_value, shape_loss, exp_loss, eular_loss, t_loss, s_loss, points_loss, geo_loss, pose_loss = sess.run(
                    [
                        network_train.train_op, network_train.loss,
                        network_train.shape_loss, network_train.exp_loss,
                        network_train.eular_loss, network_train.t_loss,
                        network_train.s_loss, network_train.points_loss,
                        network_train.geo_loss, network_train.pose_loss
                    ],
                    feed_dict={
                        network_train.is_train: True,
                        network_train.lr: lr_value,
                        X: batch_data,
                        SHAPE: batch_labels[:, :100],
                        EXP: batch_labels[:, 100:179],
                        EULAR: batch_labels[:, 179:182],
                        T: batch_labels[:, 182:184],
                        S: batch_labels[:, 184]
                    })

                duration = time.time() - start_time

                assert not np.isnan(loss_value)

                # Display & Summary(training)
                if step % FLAGS.display == 0 or step < 10:
                    num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)
                    format_str = (
                        '%s: (Training) step %d, loss=%.4f, lr=%f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str %
                          (datetime.now(), step, loss_value, lr_value,
                           examples_per_sec, sec_per_batch))

                    format_str = (
                        'shape_loss=%.4f, exp_loss=%.4f,eular_loss=%.4f,t_loss=%.4f,s_loss=%.4f,points_loss=%.4f,geo_loss=%.4f,pose_loss=%.4f'
                    )
                    print(format_str %
                          (shape_loss, exp_loss, eular_loss, t_loss, s_loss,
                           points_loss, geo_loss, pose_loss))
                    elapse = time.time() - start_time
                    time_left = (FLAGS.max_steps - step) * elapse
                    print("\tTime left: %02d:%02d:%02d" %
                          (int(time_left / 3600), int(
                              time_left % 3600 / 60), time_left % 60))

                    # summary_writer.add_summary(train_summary_str, step)

                # Save the model checkpoint periodically.
                if (step > init_step and step % FLAGS.checkpoint_interval
                        == 0) or (step + 1) == FLAGS.max_steps:
                    checkpoint_path = os.path.join(FLAGS.train_dir,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)
Exemplo n.º 26
0
with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False, name='global_step')
    images = [tf.placeholder(tf.float32, [2, 224, 224, 3])]
    labels = [tf.placeholder(tf.int32, [2])]

    # Build model
    print("Build ResNet-18 model")
    hp = resnet.HParams(batch_size=2,
                        num_gpus=1,
                        num_classes=1000,
                        weight_decay=0.001,
                        momentum=0.9,
                        finetune=False)
    network_train = resnet.ResNet(hp,
                                  images,
                                  labels,
                                  global_step,
                                  name="train")
    network_train.build_model()
    print('Number of Weights: %d' % network_train._weights)
    print('FLOPs: %d' % network_train._flops)

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
        per_process_gpu_memory_fraction=0.96),
                                            allow_soft_placement=True,
                                            log_device_placement=False))
    sess.run(init)
Exemplo n.º 27
0
n_classes = len(train_yy)
if len(args.bst) == n_classes:
    bs_list = args.bst
elif len(args.bst) == 1:
    bs_list = [args.bst[0]] * n_classes
else:
    raise RuntimeError()
train_loaders = [utils.cycle(D.DataLoader(ds, bs, shuffle=True)) \
                 for ds, bs in zip(train_datasets, bs_list)]

if args.model == 'linear':
    model = th.nn.Linear(train_x.size(1), n_classes)
elif args.model == 'mlp':
    model = mlp.MLP([train_x.size(1), 64, 64, 64, n_classes], th.relu, bn=True)
elif args.model == 'resnet':
    model = resnet.ResNet(18, n_classes)[args.model]
else:
    raise RuntimeError()
dev = th.device('cpu') if args.gpu < 0 else th.device('cuda:%d' % args.gpu)
model = model.to(dev)
params = list(model.parameters())
kwargs = {'params' : params, 'lr' : args.lr, 'weight_decay' : args.wd}
opt = {'sgd'  : optim.SGD(**kwargs),
       'adam' : optim.Adam(amsgrad=True, **kwargs)}[args.opt]
metric = getattr(utils, args.metric)

if args.tb:
    path = 'tb/%s' % args.id
    writer = tb.SummaryWriter(path)
    train_writer = tb.SummaryWriter(path + '/a')
    val_writer = tb.SummaryWriter(path + '/b')
Exemplo n.º 28
0
def train():
    print('[Dataset Configuration]')
    print('\tCIFAR-100 dir: %s' % FLAGS.data_dir)
    print('\tNumber of classes: %d' % FLAGS.num_classes)
    print('\tNumber of test images: %d' % FLAGS.num_test_instance)

    print('[Network Configuration]')
    print('\tBatch size: %d' % FLAGS.batch_size)
    print('\tResidual blocks per group: %d' % FLAGS.num_residual_units)
    print('\tNetwork width multiplier: %d' % FLAGS.k)

    print('[Optimization Configuration]')
    print('\tL2 loss weight: %f' % FLAGS.l2_weight)
    print('\tThe momentum optimizer: %f' % FLAGS.momentum)
    print('\tInitial learning rate: %f' % FLAGS.initial_lr)
    print('\tEpochs per lr step: %f' % FLAGS.lr_step_epoch)
    print('\tLearning rate decay: %f' % FLAGS.lr_decay)

    print('[Training Configuration]')
    print('\tTrain dir: %s' % FLAGS.train_dir)
    print('\tTraining max steps: %d' % FLAGS.max_steps)
    print('\tSteps per displaying info: %d' % FLAGS.display)
    print('\tSteps per testing: %d' % FLAGS.test_interval)
    print('\tSteps during testing: %d' % FLAGS.test_iter)
    print('\tSteps per saving checkpoints: %d' % FLAGS.checkpoint_interval)
    print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction)
    print('\tLog device placement: %d' % FLAGS.log_device_placement)

    with tf.Graph().as_default():

        # Build a Graph that computes the predictions from the inference model.
        images = tf.placeholder(
            tf.float32,
            [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3])
        labels = tf.placeholder(tf.int32, [FLAGS.batch_size])

        # Build model
        decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size
        hp = resnet.HParams(batch_size=FLAGS.batch_size,
                            num_classes=FLAGS.num_classes,
                            num_residual_units=FLAGS.num_residual_units,
                            k=FLAGS.k,
                            weight_decay=FLAGS.l2_weight,
                            initial_lr=FLAGS.initial_lr,
                            decay_step=decay_step,
                            lr_decay=FLAGS.lr_decay,
                            momentum=FLAGS.momentum)
        network = resnet.ResNet(hp, images, labels, None)
        network.build_model()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000)
        if os.path.isdir(FLAGS.ckpt_path):
            ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path)
            # Restores from checkpoint
            if ckpt and ckpt.model_checkpoint_path:
                print('\tRestore from %s' % ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print('No checkpoint file found in the dir [%s]' %
                      FLAGS.ckpt_path)
                sys.exit(1)
        elif os.path.isfile(FLAGS.ckpt_path):
            print('\tRestore from %s' % FLAGS.ckpt_path)
            saver.restore(sess, FLAGS.ckpt_path)
        else:
            print('No checkpoint file found in the path [%s]' %
                  FLAGS.ckpt_path)
            sys.exit(1)

        graph = tf.get_default_graph()
        block_num = 3
        conv_num = 2
        old_kernels_to_cluster = []
        old_kernels_to_add = []
        old_batch_norm = []
        for i in range(1, block_num + 1):
            for j in range(FLAGS.num_residual_units):
                old_kernels_to_cluster.append(get_kernel(i, j, 1, graph, sess))
                old_kernels_to_add.append(get_kernel(i, j, 2, graph, sess))
                old_batch_norm.append(get_batch_norm(i, j, 2, graph, sess))
        #old_batch_norm = old_batch_norm[1:]
        #old_batch_norm.append(get_last_batch_norm(graph, sess))

        new_params = []
        new_width = [
            16,
            int(16 * FLAGS.new_k),
            int(32 * FLAGS.new_k),
            int(64 * FLAGS.new_k)
        ]
        for i in range(len(old_batch_norm)):
            cluster_num = new_width[int(i / 4) + 1]
            cluster_kernels, cluster_indices = cluster_kernel(
                old_kernels_to_cluster[i], cluster_num)
            add_kernels = add_kernel(old_kernels_to_add[i], cluster_indices,
                                     cluster_num)
            cluster_batchs_norm = cluster_batch_norm(old_batch_norm[i],
                                                     cluster_indices,
                                                     cluster_num)
            new_params.append(cluster_kernels)
            for p in range(BATCH_NORM_PARAM_NUM):
                new_params.append(cluster_batchs_norm[p])
            new_params.append(add_kernels)

        # save variables
        init_params = []
        new_param_index = 0
        for var in tf.global_variables():
            update_match = UPDATE_PARAM_REGEX.match(var.name)
            skip_match = SKIP_PARAM_REGEX.match(var.name)
            if update_match and not skip_match:
                print("update {}".format(var.name))
                init_params.append((new_params[new_param_index], var.name))
                new_param_index += 1
            else:
                print("not update {}".format(var.name))
                var_vector = sess.run(var)
                init_params.append((var_vector, var.name))

        #close old graph
        sess.close()
    tf.reset_default_graph()

    # build new graph and eval
    with tf.Graph().as_default():
        init_step = 0
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # Get images and labels of CIFAR-100
        with tf.variable_scope('train_image'):
            train_images, train_labels = data_input.input_fn(FLAGS.data_dir,
                                                             FLAGS.batch_size,
                                                             train_mode=True)
        with tf.variable_scope('test_image'):
            test_images, test_labels = data_input.input_fn(FLAGS.data_dir,
                                                           FLAGS.batch_size,
                                                           train_mode=False)

        # The class labels
        with open(os.path.join(FLAGS.data_dir, 'fine_label_names.txt')) as fd:
            classes = [temp.strip() for temp in fd.readlines()]

        images = tf.placeholder(
            tf.float32,
            [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3])
        labels = tf.placeholder(tf.int32, [FLAGS.batch_size])

        new_network = resnet.ResNet(hp, images, labels, global_step,
                                    init_params, FLAGS.new_k)
        new_network.build_model()
        new_network.build_train_op()

        train_summary_op = tf.summary.merge_all()

        init = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000)
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print('\tRestore from %s' % ckpt.model_checkpoint_path)
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
            init_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found. Start from the scratch.')
        sys.stdout.flush()
        # Start queue runners & summary_writer
        tf.train.start_queue_runners(sess=sess)
        if not os.path.exists(FLAGS.train_dir):
            os.mkdir(FLAGS.train_dir)
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
        # Training!
        test_best_acc = 0.0
        for step in range(init_step, FLAGS.max_steps):
            # Test
            if step % FLAGS.test_interval == 0:
                test_loss, test_acc = 0.0, 0.0
                for i in range(FLAGS.test_iter):
                    test_images_val, test_labels_val = sess.run(
                        [test_images, test_labels])
                    loss_value, acc_value = sess.run(
                        [new_network.loss, new_network.acc],
                        feed_dict={
                            new_network.is_train: False,
                            images: test_images_val,
                            labels: test_labels_val
                        })
                    test_loss += loss_value
                    test_acc += acc_value
                test_loss /= FLAGS.test_iter
                test_acc /= FLAGS.test_iter
                test_best_acc = max(test_best_acc, test_acc)
                format_str = ('%s: (Test)     step %d, loss=%.4f, acc=%.4f')
                print(format_str % (datetime.now(), step, test_loss, test_acc))
                sys.stdout.flush()
                test_summary = tf.Summary()
                test_summary.value.add(tag='test/loss', simple_value=test_loss)
                test_summary.value.add(tag='test/acc', simple_value=test_acc)
                test_summary.value.add(tag='test/best_acc',
                                       simple_value=test_best_acc)
                summary_writer.add_summary(test_summary, step)
                summary_writer.flush()
            # Train
            start_time = time.time()
            train_images_val, train_labels_val = sess.run(
                [train_images, train_labels])
            _, lr_value, loss_value, acc_value, train_summary_str = \
                    sess.run([new_network.train_op, new_network.lr, new_network.loss, new_network.acc, train_summary_op],
                        feed_dict={new_network.is_train:True, images:train_images_val, labels:train_labels_val})
            duration = time.time() - start_time
            assert not np.isnan(loss_value)
            # Display & Summary(training)
            if step % FLAGS.display == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: (Training) step %d, loss=%.4f, acc=%.4f, lr=%f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str %
                      (datetime.now(), step, loss_value, acc_value, lr_value,
                       examples_per_sec, sec_per_batch))
                sys.stdout.flush()
                summary_writer.add_summary(train_summary_str, step)
            # Save the model checkpoint periodically.
            if (step > init_step and step % FLAGS.checkpoint_interval
                    == 0) or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Exemplo n.º 29
0
def eval(root_image, root_model):
    print("\nCurrent test image path ==> ", root_image)
    print("Current model ==> ", root_model)

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # resnet50  : [3, 4, 6, 3]
    # resnet101 : [3, 4, 23, 3]
    # resnet152 : [3, 8, 36, 3]
    model = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3])
    model.load_state_dict(torch.load(root_model))
    model.to(device)

    transforms_test = transforms.Compose([
        # transforms.Pad(4),
        # transforms.RandomHorizontalFlip(),
        # transforms.RandomCrop(10),
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # test loader
    testset = torchvision.datasets.ImageFolder(root=root_image,
                                               transform=transforms_test)

    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=16,
                                              shuffle=False,
                                              num_workers=4)

    class_correct = list(0. for i in range(67))
    class_total = list(0. for i in range(67))
    classes = os.listdir("./data/rename_headline_piap/")
    classes.sort()
    class_count = len(classes)

    model.eval()
    with torch.no_grad():
        total_corr = 0.
        correct = 0.
        total = 0.
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            # best percentage
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            total_corr += (predicted == labels).sum().item()
            correct = (predicted == labels).squeeze()

            for i in range(len(labels)):
                label = labels[i]
                class_correct[label] += correct[i].item()
                class_total[label] += 1

        for i in range(class_count):
            ### For euc-kr decoding
            # unhexlify(classes[i]).decode('euc-kr')
            # print('Accuracy of %s : %2d %%' %(unhexlify(classes[i]).decode('euc-kr')[0], 100*class_correct[i]/class_total[i]))
            print('Accuracy of %s ==> %2d %%' %
                  (classes[i], 100 * class_correct[i] / class_total[i]))

        print("Accuracy of the network ======> %2d %%" %
              (100 * total_corr / total))
Exemplo n.º 30
0
def train():
    print('[Dataset Configuration]')
    print('\tImageNet test root: %s' % FLAGS.test_image_root)
    print('\tImageNet test list: %s' % FLAGS.test_dataset)
    print('\tNumber of classes: %d' % FLAGS.num_classes)
    print('\tNumber of test images: %d' % FLAGS.num_test_instance)

    print('[Network Configuration]')
    print('\tBatch size: %d' % FLAGS.batch_size)
    print('\tCheckpoint file: %s' % FLAGS.checkpoint)

    print('[Optimization Configuration]')
    print('\tL2 loss weight: %f' % FLAGS.l2_weight)
    print('\tThe momentum optimizer: %f' % FLAGS.momentum)
    print('\tInitial learning rate: %f' % FLAGS.initial_lr)
    print('\tEpochs per lr step: %s' % FLAGS.lr_step_epoch)
    print('\tLearning rate decay: %f' % FLAGS.lr_decay)

    print('[Evaluation Configuration]')
    print('\tOutput file path: %s' % FLAGS.output_file)
    print('\tTest iterations: %d' % FLAGS.test_iter)
    print('\tSteps per displaying info: %d' % FLAGS.display)
    print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction)
    print('\tLog device placement: %d' % FLAGS.log_device_placement)

    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # Get images and labels of ImageNet
        print('Load ImageNet dataset')
        with tf.device('/cpu:0'):
            print('\tLoading test data from %s' % FLAGS.test_dataset)
            with tf.variable_scope('test_image'):
                test_images, test_labels = data_input.inputs(
                    FLAGS.test_image_root,
                    FLAGS.test_dataset,
                    FLAGS.batch_size,
                    False,
                    num_threads=1,
                    center_crop=True)

        # Build a Graph that computes the predictions from the inference model.
        images = tf.placeholder(tf.float32, [
            FLAGS.batch_size, data_input.IMAGE_HEIGHT, data_input.IMAGE_WIDTH,
            3
        ])
        labels = tf.placeholder(tf.int32, [FLAGS.batch_size])

        # Build model
        with tf.device('/GPU:0'):
            hp = resnet.HParams(batch_size=FLAGS.batch_size,
                                num_classes=FLAGS.num_classes,
                                weight_decay=FLAGS.l2_weight,
                                momentum=FLAGS.momentum,
                                finetune=FLAGS.finetune)
        network = resnet.ResNet(hp, images, labels, global_step)
        network.build_model()
        print('\tNumber of Weights: %d' % network._weights)
        print('\tFLOPs: %d' % network._flops)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                per_process_gpu_memory_fraction=FLAGS.gpu_fraction),
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        '''debugging attempt
        from tensorflow.python import debug as tf_debug
        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        def _get_data(datum, tensor):
            return tensor == train_images
        sess.add_tensor_filter("get_data", _get_data)
        '''

        sess.run(init)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000)
        if FLAGS.checkpoint is not None:
            saver.restore(sess, FLAGS.checkpoint)
            print('Load checkpoint %s' % FLAGS.checkpoint)
        else:
            print(
                'No checkpoint file of basemodel found. Start from the scratch.'
            )

        # Start queue runners & summary_writer
        tf.train.start_queue_runners(sess=sess)

        # Test!
        test_loss = 0.0
        test_acc = 0.0
        test_time = 0.0
        confusion_matrix = np.zeros((FLAGS.num_classes, FLAGS.num_classes),
                                    dtype=np.int32)
        for i in range(FLAGS.test_iter):
            test_images_val, test_labels_val = sess.run(
                [test_images, test_labels])
            start_time = time.time()
            loss_value, acc_value, pred_value = sess.run(
                [network.loss, network.acc, network.preds],
                feed_dict={
                    network.is_train: False,
                    images: test_images_val,
                    labels: test_labels_val
                })
            duration = time.time() - start_time
            test_loss += loss_value
            test_acc += acc_value
            test_time += duration
            for l, p in zip(test_labels_val, pred_value):
                confusion_matrix[l, p] += 1

            if i % FLAGS.display == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: iter %d, loss=%.4f, acc=%.4f (%.1f examples/sec; %.3f sec/batch)'
                )
                print(format_str % (datetime.now(), i, loss_value, acc_value,
                                    examples_per_sec, sec_per_batch))
        test_loss /= FLAGS.test_iter
        test_acc /= FLAGS.test_iter

        # Print and save results
        sec_per_image = test_time / FLAGS.test_iter / FLAGS.batch_size
        print('Done! Acc: %.6f, Test time: %.3f sec, %.7f sec/example' %
              (test_acc, test_time, sec_per_image))
        print('Saving result... ')
        result = {
            'accuracy': test_acc,
            'confusion_matrix': confusion_matrix,
            'test_time': test_time,
            'sec_per_image': sec_per_image
        }
        with open(FLAGS.output_file, 'wb') as fd:
            pickle.dump(result, fd)
        print('done!')