示例#1
0
    def train_steps(self, global_step):
        # Variables that affect learning rate.
        decay_steps = int(self.opt_params.NUM_STEPS_PER_DECAY)
        learning_rate_decay_factor = self.opt_params.LEARNING_RATE_DECAY_FACTOR
        moving_average_decay = self.opt_params.MOVING_AVERAGE_DECAY
        self.gen_loss, self.disc_loss = self.get_losses()

        # Compute gradients.
        with tf.control_dependencies([self.gen_loss, self.disc_loss]):
            gen_train_step_op = ops.train(
                    self.gen_loss, 
                    global_step, 
                    decay_steps, 
                    self.opt_params.GEN_INITIAL_LEARNING_RATE,
                    learning_rate_decay_factor, 
                    moving_average_decay, 
                    target_vars=self.gen_vars, 
                    name='gen')

            disc_train_step_op = ops.train(
                    self.disc_loss, 
                    global_step, 
                    decay_steps, 
                    self.opt_params.DISC_INITIAL_LEARNING_RATE,
                    learning_rate_decay_factor, 
                    moving_average_decay, 
                    target_vars=self.disc_vars, 
                    name='disc')

            return [gen_train_step_op, disc_train_step_op]
    def __init__(self,
                 lr=0.0001,
                 optimizer=tf.train.Optimizer,
                 fine_tuning=True,
                 dropout=False,
                 adaptive_ratio=1.0):
        '''

        ----------Hyperparameters -------------
        :param fine_tuning: If True, the parameters of CNN layers will also be fine-tuned.
                             Otherwise, only the parameters of FC layers will be trained.
        :param dropout: If True, dropout is applied to all fully connected layers except for the last one.
                        Also, dropout_keep_prob should be fed. (default value is 1.0)
        :param adaptive_ratio: If True, the learning rate of convolutional layer will be learning rate * adaptive_ratio
        :return:
        '''
        self.desc = "Learning rate : {}, optimizer : {}, fine_tuning : {}, dropout : {}, adaptive ratio : {}"\
            .format(lr, optimizer.__name__, fine_tuning, dropout, adaptive_ratio)
        print(self.desc)
        self.params = {
            'lr': lr,
            'optimizer': optimizer,
            'fine_tuning': fine_tuning,
            'dropout': dropout,
            'adaptive_ratio': adaptive_ratio
        }
        self.xs = tf.placeholder(tf.float32, [None, 32, 32, 3])
        self.ys = tf.placeholder(tf.int32, [None])
        self.dropout_keep_prob = tf.placeholder_with_default(1.0, None)

        pool5 = self.build_convnet(fine_tuning)
        fc3 = self.build_fcnet(pool5, dropout)
        self.probs = tf.nn.softmax(fc3, name='softmax')

        self.loss = ops.loss(logits=self.probs, labels=self.ys, one_hot=False)
        self.accuracy = ops.accuracy(logits=self.probs,
                                     labels=self.ys,
                                     one_hot=False)
        if adaptive_ratio < 1.0:
            self.train = ops.train(self.loss,
                                   optimizer=optimizer,
                                   conv_lr=lr * adaptive_ratio,
                                   fc_lr=lr)
        else:
            self.train = optimizer(learning_rate=lr).minimize(self.loss)
    cwd = os.getcwd()
    os.chdir(os.path.join(path.test, 'images'))
    # cmd = "ffmpeg -r 30 -f image2 -s 256x256 -i pic_%d-outputs.png -vcodec libx264 -crf 25  -pix_fmt yuv420p ../out.mp4"
    cmd = 'ffmpeg -r 30 -i pic_%d-outputs.png -c:v libx264 -crf 15 -vf "fps=30,format=yuv420p" ../../out.mp4'
    os.system(cmd)
    os.chdir(cwd)
elif args.cmd == 'prep':
    ops.clean_filenames(path.rawA, rename='pic_%s')
    ops.crop_square_resize(path.rawA, path.A, args.size, args.size)
    ops.crop_square_resize(path.A, path.B, args.size / 8, args.size / 8,
                           args.size, args.size)
    ops.combine(path.A, path.B, path.train, args.size)
elif args.cmd == 'escalate':
    escalate()
elif args.cmd == 'train':
    ops.train(path.model, path.train, args.epochs, args.size)
elif args.cmd == 'init_remote':
    # create dirs and git clone repo
    os.system('ssh %s "mkdir git; cd git; git clone %s; mkdir %s/projects"' %
              (vm.GPU_INSTANCE, path.GIT_REPO_URL, path.GIT_REPO_NAME))
    # install packages
    os.system(
        'ssh %s "sudo apt-get install -y ffmpeg python-imaging python3-pil"' %
        (vm.GPU_INSTANCE))
elif args.cmd == 'train_remote':
    """Run on GPU_INSTANCE via ssh and tmux.
    To keep a process running I use:
        tmux -d python script.py
    Manually running tmux first works too. Detaching is done with [ctrl]-[b], [d].
    And a running tmux session can be reatached with: tmux attach
    """
示例#4
0
 def train(self, global_step):
     with tf.control_dependencies([self.loss]):
         return ops.train(self.loss,
                          global_step,
                          learning_rate=self.lr,
                          name='training_step')
示例#5
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch Clothing1M')
    parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)')
    parser.add_argument('--test_batch_size', type=int, default=256, help='input batch size for testing (default: 256)')
    parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train (default: 120)')
    parser.add_argument('--gpu_id', type=int, default=0, help='index of gpu to use (default: 0)')
    parser.add_argument('--lr', type=float, default=0.001, help='init learning rate (default: 0.1)')
    parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)')
    parser.add_argument('--save', action='store_true', default=False, help='For saving softmax_out_avg')
    parser.add_argument('--SEAL', type=int, default=0, help='Phase of self-evolution')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device('cuda:'+str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data/Clothing1M'
    num_classes = 14
    kwargs = {'num_workers': 32, 'pin_memory': True} if torch.cuda.is_available() else {}
    transform_train = transforms.Compose([transforms.Resize((256, 256)),
                                          transforms.RandomCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    transform_test = transforms.Compose([transforms.Resize((224, 224)),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
 
    train_dataset = Clothing1M(root, mode='train', transform=transform_train)
    val_dataset = Clothing1M(root, mode='val', transform=transform_test)
    test_dataset = Clothing1M(root, mode='test', transform=transform_test)
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs)
    softmax_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False, **kwargs)


    def learning_rate(lr_init, epoch):
        optim_factor = 0
        if(epoch > 5):
            optim_factor = 1
        return lr_init*math.pow(0.1, optim_factor)

    def load_pretrain(num_classes, device):
        model_pre = resnet50(num_classes=1000, pretrained=True) # imagenet pretrained, numclasses=1000
        if num_classes==1000:
            return model_pre.to(device)

        else:
            model = resnet50(num_classes=num_classes, pretrained=False)
            params_pre = model_pre.state_dict().copy()
            params = model.state_dict()
            for i in params_pre:
                if not i.startswith('fc'):
                    params[i] = params_pre[i]
            model.load_state_dict(params)
            return model.to(device)

    # results
    results_root = os.path.join('results', 'clothing')
    if not os.path.isdir(results_root):
        os.makedirs(results_root)

    """ Test model """
    if args.SEAL==-1:
        model = resnet50().to(device)
        model.load_state_dict(torch.load(os.path.join(results_root, 'seed0_clothing_normal.pt')))
        test(args, model, device, test_loader)


    """ Get softmax_out_avg - normal training on noisy labels """
    if args.SEAL==0:
        # Building model
        model = load_pretrain(num_classes, device)
        model = torch.nn.DataParallel(model, device_ids=[0,1,2,3])

        # Training
        best_val_acc = 0
        save_path = os.path.join(results_root, 'seed'+str(args.seed)+'_clothing_normal.pt')
        softmax_out = []
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(), lr=learning_rate(args.lr, epoch), momentum=0.9, weight_decay=1e-3)
            train(args, model, device, train_loader, optimizer, epoch)
            best_val_acc = val_test(args, model, device, val_loader, test_loader, best_val_acc, save_path)
            softmax_out.append(get_softmax_out(model, softmax_loader, device))

        if args.save:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_normal.npy')
            softmax_out = np.concatenate(softmax_out)
            np.save(softmax_root, softmax_out)
            print('new softmax_out saved to', softmax_root, ', shape: ', softmax_out.shape)


    """ Self Evolution - training on softmax_out_avg """
    if args.SEAL>=1:
        if args.SEAL==1:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_normal.npy')
            model_path = os.path.join(results_root, 'seed'+str(args.seed)+'_clothing_normal.pt')
        else:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_SEAL'+str(args.SEAL-1)+'.npy')
            model_path = os.path.join(results_root, 'seed'+str(args.seed)+'_clothing_SEAL'+str(args.SEAL-1)+'.pt')

        save_path = os.path.join(results_root, 'seed'+str(args.seed)+'_clothing_SEAL'+str(args.SEAL)+'.pt')   
            
        # Loading softmax_out_avg of last phase
        softmax_out_avg = np.load(softmax_root).reshape([-1, len(train_dataset), num_classes])
        softmax_out_avg = softmax_out_avg.mean(axis=0)
        print('softmax_out_avg loaded from', softmax_root, ', shape: ', softmax_out_avg.shape)

        # Dataset with soft targets
        train_dataset_soft = Clothing1M_soft(root, targets_soft=torch.Tensor(softmax_out_avg.copy()), mode='train', transform=transform_train)
        train_loader_soft = torch.utils.data.DataLoader(train_dataset_soft, batch_size=args.batch_size, shuffle=True, **kwargs)

        # Building model
        model = load_pretrain(num_classes, device)
        model = torch.nn.DataParallel(model, device_ids=[0,1,2,3])
        model.load_state_dict(torch.load(model_path))
        print('Initialize the model using {}.'.format(model_path))

        # Training
        best_val_acc = 0
        softmax_out = []
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(), lr=learning_rate(args.lr, epoch), momentum=0.9, weight_decay=1e-3)
            train_soft(args, model, device, train_loader_soft, optimizer, epoch)
            best_val_acc = val_test(args, model, device, val_loader, test_loader, best_val_acc, save_path)
            softmax_out.append(get_softmax_out(model, softmax_loader, device))

        if args.save:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_SEAL'+str(args.SEAL)+'.npy')
            softmax_out = np.concatenate(softmax_out)
            np.save(softmax_root, softmax_out)
            print('new softmax_out saved to', softmax_root, ', shape: ', softmax_out.shape)
示例#6
0
def train(args, data, params):
    train = data['train']
    valid = data['valid']
    learning_rate = args.learning_rate

    with tf.Graph().as_default():
        input_ph = tf.placeholder(tf.int32,
                                  shape=[args.batch_size, params['gram_size']])
        targ_ph = tf.placeholder(tf.int32, shape=[args.batch_size])
        learning_rate_ph = tf.placeholder(tf.float32, shape=[])

        if args.w2v:
            with h5py.File(args.w2v, 'r') as datafile:
                embeds = datafile['w2v'][:]
            scores, normalize_op, vars = ops.model(input_ph, params, embeds)
        else:
            scores, normalize_op, vars = ops.model(input_ph, params)

        loss = ops.loss(scores, targ_ph)
        train_op, print_op = ops.train(loss, learning_rate_ph, args)

        #sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=NUM_THREADS,\
        #		intra_op_parallelism_threads=NUM_THREADS))
        sess = tf.Session()
        init = tf.initialize_all_variables(
        )  # initialize variables before they can be used
        saver = tf.train.Saver()
        sess.run(init)
        if args.modelfile:
            saver.restore(sess, args.modelfile)
            print "Model restored from %s" % args.modelfile

        valid_loss = 0.
        for i in xrange(valid.nbatches):
            valid_feed_dict = get_feed_dict(valid, i, input_ph, targ_ph,
                                            learning_rate_ph)
            batch_loss = sess.run([loss], feed_dict=valid_feed_dict)[0]
            valid_loss += batch_loss
        last_valid = valid_loss
        print 'Initial valid loss: %.3f' % math.exp(
            valid_loss / valid.nbatches)

        for epoch in xrange(args.nepochs):
            print "Training epoch %d with learning rate %.3f" % (epoch + 1,
                                                                 learning_rate)
            vals = sess.run(vars)
            start_time = time.time()
            train_loss = 0.
            valid_loss = 0.

            for i in xrange(train.nbatches):
                train_feed_dict = get_feed_dict(train, i, input_ph, targ_ph, \
                    learning_rate_ph, learning_rate)
                #grads = sess.run(print_op, feed_dict=train_feed_dict)
                _, batch_loss = sess.run([train_op, loss],
                                         feed_dict=train_feed_dict)
                train_loss += batch_loss

            for i in xrange(valid.nbatches):
                valid_feed_dict = get_feed_dict(valid, i, input_ph, targ_ph,
                                                learning_rate_ph)
                batch_loss = sess.run([loss], feed_dict=valid_feed_dict)[0]
                valid_loss += batch_loss

            if args.normalize:
                _ = sess.run(normalize_op)

            duration = time.time() - start_time
            print "\tloss = %.3f, valid ppl = %.3f, %.3f s" % \
                (math.exp(train_loss/train.nbatches), \
                    math.exp(valid_loss/valid.nbatches), duration)
            if last_valid < valid_loss:
                learning_rate /= 2.
            elif args.outfile:
                saver.save(sess, args.outfile)
            if epoch >= args.decay_after:
                learning_rate /= 1.2
            last_valid = valid_loss

        return sess.run([normalize_op
                         ])[0]  # return final normalized embeddings
one_hot = 'no'
batch_size = 128

train_loss = []
train_acc = []
valid_acc = []
valid_loss = []
ini_train_loss = 1000000
ini_valid_loss = 1000000
s = timeit.default_timer()

for i in tf.range(iteration):
  rand_index = np.random.choice(len(trainX), size=batch_size, replace=False)
  rand_x = tf.convert_to_tensor(trainX[rand_index], dtype=tf.float32)
  rand_y = tf.convert_to_tensor(trainY[rand_index], dtype=tf.int32) # label
  temp_train_loss = ops.train(model, rand_x, rand_y, optimizer)
  
  if (i+1) % 20 == 0:
    # Record and print results
    batch_predictions = ops.predict(model, rand_x)
    valid_index = np.random.choice(len(trainValY), size=batch_size)
    valid_x = tf.convert_to_tensor(trainValX[valid_index], dtype=tf.float32)
    valid_y = tf.convert_to_tensor(trainValY[valid_index], dtype=tf.int32)
    valid_predictions = ops.predict(model, valid_x)
  
    batch_predictions = tf.argmax(batch_predictions, axis=1, output_type=tf.int32)
    temp_train_acc = tf.reduce_mean(tf.cast(tf.equal(batch_predictions, rand_y), tf.float32))
  
    valid_predictions = tf.argmax(valid_predictions, axis=1, output_type=tf.int32)
    temp_valid_acc = tf.reduce_mean(tf.cast(tf.equal(valid_predictions, valid_y), tf.float32))
      
示例#8
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST')
    parser.add_argument('--batch_size', type=int, default=64, help='input batch size for training (default: 64)')
    parser.add_argument('--test_batch_size', type=int, default=1000, help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=50, help='number of epochs to train (default: 50)')
    parser.add_argument('--gpu_id', type=int, default=0, help='index of gpu to use (default: 0)')
    parser.add_argument('--lr', type=float, default=0.01, help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, help='SGD momentum (default: 0.5)')
    parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)')
    parser.add_argument('--noise_pattern', type=str, default='dependent', help='Noise pattern (default: dependent)')
    parser.add_argument('--noise_rate', type=float, default=0.0, help='Noise rate (default: 0.0)')
    parser.add_argument('--save', action='store_true', default=False, help='For saving softmax_out_avg')
    parser.add_argument('--SEAL', type=int, default=0, help='Phase of self-evolution')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device('cuda:'+str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data'
    kwargs = {'num_workers': 4, 'pin_memory': True} if torch.cuda.is_available() else {}
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) #0.1307, 0.3081 are the mean and std of mnist
    train_dataset = datasets.MNIST(root, train=True, download=True, transform=transform)
    train_dataset_noisy = datasets.MNIST(root, train=True, transform=transform)
    test_dataset = datasets.MNIST(root, train=False, transform=transform)

    targets_noisy = torch.Tensor(pd.read_csv(os.path.join('./data/MNIST/label_noisy', args.noise_pattern+str(args.noise_rate)+'.csv'))['label_noisy'].values.astype(int))
    train_dataset_noisy.targets = targets_noisy
    
    train_loader = torch.utils.data.DataLoader(train_dataset_noisy, batch_size=args.batch_size, shuffle=True, **kwargs)
    softmax_loader = torch.utils.data.DataLoader(train_dataset_noisy, batch_size=args.test_batch_size, shuffle=False, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False, **kwargs)

    # results
    results_root = os.path.join('results', 'mnist_'+args.noise_pattern+str(args.noise_rate))
    if not os.path.isdir(results_root):
        os.makedirs(results_root)

    """ Get softmax_out_avg - normal training on noisy labels """
    if args.SEAL==0:
        # Building model
        model = MNIST_CNN().to(device)
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset_noisy), 10])
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_normal.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ', softmax_out_avg.shape)

    """ Self Evolution - training on softmax_out_avg """
    if args.SEAL>=1:
        # Loading softmax_out_avg of last phase
        if args.SEAL==1:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_normal.npy')
        else:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_SEAL'+str(args.SEAL-1)+'.npy')
        softmax_out_avg = np.load(softmax_root)
        print('softmax_out_avg loaded from', softmax_root, ', shape: ', softmax_out_avg.shape)
       
        # Dataset with soft targets
        train_dataset_soft = MNIST_soft(root, targets_soft=torch.Tensor(softmax_out_avg.copy()), train=True, transform=transform)
        train_dataset_soft.targets = targets_noisy
        train_loader_soft = torch.utils.data.DataLoader(train_dataset_soft, batch_size=args.batch_size, shuffle=True, **kwargs)

        # Building model
        model = MNIST_CNN().to(device)
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

        # Training 
        softmax_out_avg = np.zeros([len(train_dataset_noisy), 10])
        for epoch in range(1, args.epochs + 1):
            train_soft(args, model, device, train_loader_soft, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_SEAL'+str(args.SEAL)+'.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ', softmax_out_avg.shape)
示例#9
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch cifar10')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        help='input batch size for training (default: 128)')
    parser.add_argument('--test_batch_size',
                        type=int,
                        default=1000,
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=150,
                        help='number of epochs to train (default: 150)')
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='index of gpu to use (default: 0)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='init learning rate (default: 0.1)')
    parser.add_argument('--dp',
                        type=float,
                        default=0.0,
                        help='dropout rate (default: 0.0)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--noise_pattern',
                        type=str,
                        default='dependent',
                        help='Noise pattern (default: dependent)')
    parser.add_argument('--noise_rate',
                        type=float,
                        default=0.0,
                        help='Noise rate (default: 0.0)')
    parser.add_argument('--save',
                        action='store_true',
                        default=False,
                        help='For saving softmax_out_avg')
    parser.add_argument('--SEAL',
                        type=int,
                        default=0,
                        help='Phase of self-evolution')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data/CIFAR10'
    num_classes = 10
    kwargs = {
        'num_workers': 4,
        'pin_memory': True
    } if torch.cuda.is_available() else {}
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    train_dataset = datasets.CIFAR10(root,
                                     train=True,
                                     download=True,
                                     transform=transform_train)
    train_dataset_noisy = datasets.CIFAR10(root,
                                           train=True,
                                           transform=transform_train)
    test_dataset = datasets.CIFAR10(root,
                                    train=False,
                                    transform=transform_test)

    targets_noisy = list(
        pd.read_csv(
            os.path.join('./data/CIFAR10/label_noisy',
                         args.noise_pattern + str(args.noise_rate) +
                         '.csv'))['label_noisy'].values.astype(int))
    train_dataset_noisy.targets = targets_noisy

    train_loader = torch.utils.data.DataLoader(train_dataset_noisy,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    softmax_loader = torch.utils.data.DataLoader(
        train_dataset_noisy,
        batch_size=args.test_batch_size,
        shuffle=False,
        **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    def learning_rate(lr_init, epoch):
        optim_factor = 0
        if (epoch > 120):
            optim_factor = 2
        elif (epoch > 60):
            optim_factor = 1
        return lr_init * math.pow(0.2, optim_factor)

    # results
    results_root = os.path.join(
        'results', 'cifar10_' + args.noise_pattern + str(args.noise_rate))
    if not os.path.isdir(results_root):
        os.makedirs(results_root)
    """ Get softmax_out_avg - normal training on noisy labels """
    if args.SEAL == 0:
        # Building model
        model = Wide_ResNet(depth=28,
                            widen_factor=10,
                            dropout_rate=args.dp,
                            num_classes=num_classes).to(device)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset_noisy), num_classes])
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(),
                                  lr=learning_rate(args.lr, epoch),
                                  momentum=0.9,
                                  weight_decay=5e-4)
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_normal.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ',
                  softmax_out_avg.shape)
    """ Self Evolution - training on softmax_out_avg """
    if args.SEAL >= 1:
        # Loading softmax_out_avg of last phase
        if args.SEAL == 1:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_normal.npy')
        else:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_SEAL' +
                str(args.SEAL - 1) + '.npy')
        softmax_out_avg = np.load(softmax_root)
        print('softmax_out_avg loaded from', softmax_root, ', shape: ',
              softmax_out_avg.shape)

        # Dataset with soft targets
        train_dataset_soft = CIFAR10_soft(root,
                                          targets_soft=torch.Tensor(
                                              softmax_out_avg.copy()),
                                          train=True,
                                          transform=transform_train)
        train_dataset_soft.targets = targets_noisy
        train_loader_soft = torch.utils.data.DataLoader(
            train_dataset_soft,
            batch_size=args.batch_size,
            shuffle=True,
            **kwargs)

        # Building model
        model = Wide_ResNet(depth=28,
                            widen_factor=10,
                            dropout_rate=args.dp,
                            num_classes=num_classes).to(device)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset_noisy), num_classes])
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(),
                                  lr=learning_rate(args.lr, epoch),
                                  momentum=0.9,
                                  weight_decay=5e-4)
            train_soft(args, model, device, train_loader_soft, optimizer,
                       epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_SEAL' +
                str(args.SEAL) + '.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ',
                  softmax_out_avg.shape)
    with open(os.path.join(args.save, "loss.json"), "w") as outfile:
        json.dump([train, val], outfile)


for epoch in range(args.startepoch + 1, args.epochs + 1):
    training_loss = 0
    iter_since = time.time()
    try:
        for index, dialog in enumerate(training_data):
            if args.ss:
                teacher_forcing_ratio = (teacher_lazy_period - epoch +
                                         1) / teacher_lazy_period
                if teacher_forcing_ratio < 0.5:
                    teacher_forcing_ratio = 0.5
            training_loss += train(my_lang, criterion, teacher_forcing_ratio,\
                    dialog, encoder, context, decoder, \
                    encoder_optimizer, context_optimizer, decoder_optimizer)
            if (index) % 500 == 0:
                print("    @ Iter [", index + 1, "/", len(training_data),"] | avg. loss: ", training_loss / (index + 1), \
                        " | perplexity: ", math.exp(training_loss / (index + 1))," | usage ", time.time() - iter_since, " seconds | teacher_force: ", \
                        teacher_forcing_ratio)
                sample(my_lang, dialog, encoder, context, decoder)
                iter_since = time.time()
            if (index + 1) % 2000 == 0:
                val_since = time.time()
                validation_score_100 = validate(my_lang, criterion, teacher_forcing_ratio, \
                        validation_data[:100], encoder, context, decoder, \
                        encoder_optimizer, context_optimizer, decoder_optimizer)
                print("    @ Val. [", index + 1, "/", len(training_data),"] | avg. val. loss: ", validation_score_100, \
                        " | perplexity: ", math.exp(validation_score_100)," | usage ", time.time() - val_since, " seconds")
                print("    % Best validation score: ",
示例#11
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test_batch_size',
                        type=int,
                        default=1000,
                        help='input batch size for testing (default: 1000)')
    parser.add_argument(
        '--epochs',
        type=int,
        default=20,
        help='number of epochs to train (default: 20)'
    )  # On clean data, 20 is sufficiently large to achiece 100% training accuracy.
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='index of gpu to use (default: 0)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--noise_rate',
                        type=float,
                        default=0.0,
                        help='Noise rate (default: 0.0)')
    parser.add_argument('--load',
                        action='store_true',
                        default=False,
                        help='Load existing averaged softmax')
    parser.add_argument('--gen',
                        action='store_true',
                        default=False,
                        help='Generate noisy labels')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data'
    kwargs = {
        'num_workers': 4,
        'pin_memory': True
    } if torch.cuda.is_available() else {}
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])  #0.1307, 0.3081 are the mean and std of mnist
    train_dataset = datasets.MNIST(root,
                                   train=True,
                                   download=True,
                                   transform=transform)
    test_dataset = datasets.MNIST(root, train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    if args.load:
        softmax_out_avg = np.load('data/MNIST/label_noisy/softmax_out_avg.npy')
        print('softmax_out_avg loaded, shape: ', softmax_out_avg.shape)

    else:
        # Building model
        model = MNIST_CNN().to(device)
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset), 10])
        softmax_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        np.save('data/MNIST/label_noisy/softmax_out_avg.npy', softmax_out_avg)

    if args.gen:
        print('Generating noisy labels according to softmax_out_avg...')
        label = np.array(train_dataset.targets)
        label_noisy_cand, label_noisy_prob = [], []
        for i in range(len(label)):
            pred = softmax_out_avg[i, :].copy()
            pred[label[i]] = -1
            label_noisy_cand.append(np.argmax(pred))
            label_noisy_prob.append(np.max(pred))

        label_noisy = label.copy()
        index = np.argsort(label_noisy_prob)[-int(args.noise_rate *
                                                  len(label)):]
        label_noisy[index] = np.array(label_noisy_cand)[index]

        save_pth = os.path.join('./data/MNIST/label_noisy',
                                'dependent' + str(args.noise_rate) + '.csv')
        pd.DataFrame.from_dict({
            'label': label,
            'label_noisy': label_noisy
        }).to_csv(save_pth, index=False)
        print('Noisy label data saved to ', save_pth)