示例#1
0
def main():
    X_tr, y_tr, X_te, y_te = load_data()
    X_tr, y_tr = X_tr[:1024], y_tr[:1024]
    X_te, y_te = X_te[:128], y_te[:128]
    if args.model == 'cnn':
        model = ConvNet()
        model_save_path = config.CNN_MODEL_PATH
    else:
        model = CapsuleNet()
        model_save_path = config.CAPSULE_MODEL_PATH

    model.to(device)
    optimizer = Adam(model.parameters())
    train_loss = []
    train_accuracy = []
    best_acc = 0.0
    for epoch in range(10):
        print(("Epoch %d " + "-" * 70) % (epoch + 1))
        loss = train(model, optimizer, X_tr, y_tr)
        train_loss.append(loss)
        acc = test(model, X_tr, y_tr, "Train")
        train_accuracy.append(acc)
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), model_save_path)
    pickle.dump((train_loss, train_accuracy), \
        open('result/' + args.model + '_train.p', 'wb'))
示例#2
0
def main():

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Using device: ", device)

    model = ConvNet().to(device)

    torch.save(model.state_dict(), FILE)
    print("Finished saving model.")
示例#3
0
class Training:
    def __init__(self, epoch, learningRate, batchSize, imageSize, L2Rate, trainPath):
        super(Training, self).__init__()
        self.epoch = epoch
        self.learningRate = learningRate
        self.batchSize = batchSize
        self.imageSize = imageSize
        self.L2Rate = L2Rate
        self.trainPath = trainPath
        self.data_size = calculate_data_size(self.trainPath)
        self.num_batches = self.data_size // batchSize
        self.data_loader = run_loader('train', trainPath, batchSize, imageSize, shuffle=True)
        self.model = ConvNet(10)
        self.train()

    def train(self):
        self.model.train()

        crossentropy = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learningRate, weight_decay=self.L2Rate)

        for epoch in range(self.epoch):
            epoch_loss = 0
            epoch_acc = 0
            for X, y in tqdm(self.data_loader):
                optimizer.zero_grad()
                out = self.model(X)

                loss = crossentropy(out, y)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()  # makes it to python float
                predictions = torch.argmax(out, 1)
                epoch_acc += torch.sum(predictions == y).item()

            epoch_loss = epoch_loss / self.num_batches
            epoch_acc = epoch_acc / self.data_size
            print(f"Epoch {epoch}:", "ACC:", epoch_acc, "LOSS:", epoch_loss)

            torch.save(self.model.state_dict(), f"Trained/Model_{epoch}.model")
示例#4
0
def main():

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Using device: ", device)

    model = ConvNet().to(device)

    try:
        model.load_state_dict(torch.load(FILE))
        print("Finished loading model.")
        model.eval()
    except IOError:
        print("Failed to load model. Model might not exist.")
        return

    print("Print Network Parameters:")
    for param in model.parameters():
        print(param)

    print("Print model state dict: ", model.state_dict())

    with torch.no_grad():
        print("Perform inference/testing here...")
示例#5
0
                "val_loss_batch": batch_loss,
                "val_acc_batch": batch_acc
            })
            val_loop.set_postfix_str(
                f"val_loss: {round(val_loss/i, 4)} - val_acc: {round(val_acc/i, 4)}"
            )

        # Change learning rate
        lr_scheduler.step()

        # Calculate and log averages
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        train_acc /= len(train_loader)
        val_acc /= len(val_loader)
        wandb.log({
            "train_loss_epoch": train_loss,
            "train_acc_epoch": train_acc,
            "val_loss_epoch": val_loss,
            "val_acc_epoch": val_acc,
        })
        total_time = round(time.time() - start_time, 1)
        print(f"Time per epoch: {total_time}s")

        # Save model
        torch.save(
            model.state_dict(),
            os.path.join(wandb.run.dir,
                         f"model_{epoch}_{round(val_loss, 4)}.pth"),
        )
示例#6
0
        images = im_batch['image']
        images = images.to(device)

        labels = im_batch['arrangement']
        labels = labels.reshape(-1, num_classes)
        labels = labels.float().to(device)

        outputs = model(images)

        outputs.reshape(test_batch_size, num_classes)
        # get class index from one-hot
        _, predicted = torch.max(outputs.data, 1)
        _, classes = torch.max(labels.data, 1)
        total += outputs.size(0)

        if predicted == classes:
            count = count + 1

    print('Test Accuracy of the model on the 1453 test images: {} %'.format(
        100 * count / total))

# Save the model checkpoint
# save model
if not os.path.exists('./saved_models/'):
    os.makedirs('./saved_models/')

ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d_%H%M%S')

torch.save(model.state_dict(), "./saved_models/model_arr_" + st + ".ckpt")
示例#7
0
from model import SimpleNet, ResNet, ConvNet
from mcts import mcts
from agents import netAgent, processObservation
from epoch_training import selfplay, net_update
from evaluation import evaluate

model = ConvNet(42, 7, 64)
defaultModel = ConvNet(42, 7, 64)

log = open("log.txt", 'w')
# defaultModel.load_state_dict(torch.load('parameters_simple128.pth'))
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1000):
    agent = netAgent(model, return_probs=True)
    against = netAgent(defaultModel, incorrect_moves=False)

    training_data = selfplay(agent, against, num=10)
    net_update(model, training_data, optimizer)

    agent = netAgent(model, incorrect_moves=False, best_move=False)
    against = netAgent(defaultModel, incorrect_moves=False, best_move=False)
    result = evaluate(agent, against, 1000)

    log.write("Epoch " + str(epoch) + " Result: " + str(result) + "\n")
    print("Test result: ", result)
    if (result > 0.65):
        torch.save(model.state_dict(), "parameters_simple128.pth")
        defaultModel.load_state_dict(model.state_dict())
        print("switch")
        log.write("Switch\n")
示例#8
0
    summary(model, input_size=(3, 640, 640), device='cpu')
# model.load_state_dict(torch.load('no_gassuion_epoch35.pth'))

criterion = MultiBranchLoss(input_size=(640, 640), writer=writer, obj_scale=obj_scale, nobj_scale=nobj_scale,
                            loc_scale=loc_scale)
optimizer = Adam(model.parameters(), lr=learing_rate)

batchs_loss = 0
for epoch in range(epochs):
    model.train()
    dataset = WIDERFaceDetection(WIDERFace_ROOT, transform=SSDAugmentation(640, (127.5, 127.5, 127.5)))
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    for i, (images, labels) in enumerate(dataloader):
        batch_num = epoch * len(dataloader) + i + 1
        optimizer.zero_grad()
        if torch.cuda.is_available():
            images = images.cuda()
        outputs = model(images)
        loss = criterion(outputs, labels, batch_num)
        batchs_loss += loss.item()
        loss.backward()
        optimizer.step()
        if batch_num % show_iter == 0:
            average_loss = batchs_loss / show_iter
            print("epoch {} batch {}:".format(epoch, i))
            print('total_loss', average_loss)
            writer.add_scalar('total_loss', average_loss, global_step=batch_num)
            batchs_loss = 0
    if epoch%10 == 0:
        torch.save(model.state_dict(), "no_gassuion_epoch{}.pth".format(epoch))
示例#9
0
            # Show the training information
            if batch % 500 == 0 or batch == len(val_loader):
                acc = val_correct_cnt / val_total_cnt
                ave_loss = val_total_loss / batch
                print(
                    'Validation batch index: {}, val loss: {:.6f}, acc: {:.3f}'
                    .format(batch, ave_loss, acc))

        validation_loss.append(ave_loss)
        validation_acc.append(acc)

        model.train()

    # Save trained model
    torch.save(model.state_dict(), './checkpoint/%s.pth' % model.name())

    # Plot Learning Curve
    # TODO
    fig, axs = plt.subplots(nrows=2, ncols=2, constrained_layout=True)
    axs[0, 0].plot(train_loss)
    axs[0, 0].set_xlabel('epoch', fontsize=12)
    axs[0, 0].set_ylabel('loss', fontsize=12)
    axs[0, 0].set_title('Training Loss', fontsize=14)

    axs[0, 1].plot(validation_loss)
    axs[0, 1].set_xlabel('epoch', fontsize=12)
    axs[0, 1].set_ylabel('loss', fontsize=12)
    axs[0, 1].set_title('Validation Loss', fontsize=14)

    axs[1, 0].plot(train_acc)
示例#10
0
def train(pre_trained=None):

    # create folder to save models and loss graphs

    reference = hp['net_type'] + str(time.strftime("_%Y%m%d_%H%M%S"))
    checkpoints_folder = hp["output_dir"] + '/checkpoints/' + reference
    os.makedirs(checkpoints_folder, exist_ok=True)

    # save hyper parameter settings
    pickle_file_location = checkpoints_folder + "/hp.pkl"
    pickle_file = open(pickle_file_location, "wb")
    pickle.dump(hp, pickle_file)
    pickle_file.close()

    # create data iterator
    train_data_set = DataGenerator(hp)
    iterator = DataLoader(dataset=train_data_set,
                          batch_size=hp['batch_size'],
                          num_workers=hp['num_workers'],
                          pin_memory=True,
                          shuffle=False,
                          drop_last=True)

    val_set = ValidationDataGenerator(hp)
    val_set_iterator = DataLoader(dataset=val_set,
                                  batch_size=50,
                                  num_workers=hp['num_workers'],
                                  pin_memory=True,
                                  shuffle=False,
                                  drop_last=True)
    # create model and loss

    model = ConvNet().to(device)
    loss = CrossEntropyLoss().to(device)

    # optimizer
    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=hp['learning_rate'])

    start_epoch = 0
    # load pre trained model

    if pre_trained is not None:
        ckpt = torch.load(pre_trained)
        model.load_state_dict(ckpt['net'])
        optimizer.load_state_dict(ckpt['opt'])
        start_epoch = ckpt['epoch'] + 1

    # init loss arrays
    classification_loss = np.zeros(hp['num_epochs'])
    train_accuracy = np.zeros(hp['num_epochs'])
    val_accuracy = np.zeros(hp['num_epochs'])

    # training loop
    for epoch in range(start_epoch, hp['num_epochs']):
        c_loss = 0
        acc = 0
        for i, (img, label) in enumerate(iterator):
            img = img.to(device, dtype=torch.float)
            label = label.to(device, dtype=torch.float)

            optimizer.zero_grad()
            logits = model(img)
            l = loss(logits, label.long())
            l.backward()
            optimizer.step()

            c_loss += l.item()
            # calc accuracy
            logits = logits.detach().cpu().numpy()
            label = label.detach().cpu().numpy()
            acc += utils.classification_accuracy(logits, label)
            print("epoch = {}, Training_sample={}, classification loss ={}".
                  format(epoch, i, l.item()))

        # average loss per epoch
        classification_loss[epoch] = c_loss / (i + 1)
        # average accuracy per epoch
        train_accuracy[epoch] = acc / (i + 1)

        print("epoch = {}, average classification loss ={}".format(
            epoch, classification_loss[epoch]))
        print("epoch = {}, Training accuracy ={}".format(
            epoch, train_accuracy[epoch]))

        with torch.no_grad():
            val_acc = 0
            for i, (img, label) in enumerate(val_set_iterator):
                img = img.to(device, dtype=torch.float)
                label = label.to(device, dtype=torch.float)
                logits = model(img)
                # calc accuracy
                logits = logits.detach().cpu().numpy()
                label = label.detach().cpu().numpy()
                val_acc += utils.classification_accuracy(logits, label)

        val_accuracy[epoch] = val_acc / (i + 1)
        print("epoch = {},  Validation set accuracy ={}".format(
            epoch, val_accuracy[epoch]))

        # plot accuracy curves and save model
        plt.plot(range(1,
                       len(train_accuracy) + 1),
                 train_accuracy,
                 'b-',
                 label=" Train Accuracy")
        plt.plot(range(1,
                       len(val_accuracy) + 1),
                 val_accuracy,
                 'r-',
                 label="Validation Accuracy")
        plt.xlabel("epochs")
        plt.ylabel("accuracy")
        plt.legend(loc='best')
        plt.savefig(checkpoints_folder + "/accuracy.jpeg", bbox_inches="tight")
        plt.clf()

        net_save = {
            'net': model.state_dict(),
            'opt': optimizer.state_dict(),
            'epoch': epoch
        }
        torch.save(
            net_save, checkpoints_folder +
            "/convnet_ethiopian_mnist_epoch{}.pth".format(epoch))
示例#11
0
    with open('char_dict', 'rb') as f:
        class_dict = pickle.load(f)
    num_classes = len(class_dict)

    # 读取数据
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
    ])
    dataset = HWDB(path=data_path, transform=transform)
    print("训练集数据:", dataset.train_size)
    print("测试集数据:", dataset.test_size)
    trainloader, testloader = dataset.get_loader(batch_size)

    net = ConvNet(num_classes)
    if torch.cuda.is_available():
        net = net.cuda()
    net.load_state_dict(torch.load('checkpoints/handwriting_iter_009.pth'))

    print('网络结构:\n')
    #summary(net, input_size=(3, 64, 64), device='cuda')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr)
    writer = SummaryWriter(log_path)
    for epoch in range(10, epochs):
        train(epoch, net, criterion, optimizer, trainloader, writer=writer)
        valid(epoch, net, testloader, writer=writer)
        print("epoch%d 结束, 正在保存模型..." % epoch)
        torch.save(net.state_dict(),
                   save_path + 'handwriting_iter_%03d.pth' % epoch)
示例#12
0
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
示例#13
0
def main(args):
    best_acc1 = 0
    os.makedirs('checkpoints', exist_ok=True)

    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device: {}'.format(args.device))

    # create model
    model = ConvNet(cfg.NUM_CLASSES).to(args.device)
    #model.apply(weights_init_normal)
    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=args.device)

            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True
    # Data loading code
    train_dataset = ImageFolder(cfg.TRAIN_PATH)
    val_dataset = ImageFolder(cfg.VAL_PATH)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    logger = Logger('./logs')
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        adjust_learning_rate(optimizer, epoch, args)
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch, args)

        # evaluate on validation set
        val_loss, val_acc = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = val_acc > best_acc1
        best_acc1 = max(val_acc, best_acc1)

        # log
        info = {
            'train_loss': float(train_loss),
            'train_acc': float(train_acc),
            'val_loss': float(val_loss),
            'val_acc': float(val_acc)
        }
        for tag, value in info.items():
            logger.scalar_summary(tag, value, epoch)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
示例#14
0
class Learner:
    def __init__(self, args, q_batch):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.q_batch = q_batch
        self.learn_step_counter = 0
        self.gamma = args.gamma
        self.batch_size = args.batch_size

        self.env = gym.make(args.env)
        self.n_act = self.env.action_space.n
        self.n_state = self.env.observation_space.shape[0]
        self.n_atom = args.atom

        self.v_min = args.v_min
        self.v_max = args.v_max

        self.dz = (self.v_max - self.v_min) / (self.n_atom - 1)
        self.z = [self.v_min + i * self.dz for i in range(self.n_atom)]
        self.z_space = torch.FloatTensor(self.z).to(self.device)

        self.net = ConvNet(self.n_state, self.n_act, self.n_atom).to(self.device)
        self.target_net = ConvNet(self.n_state, self.n_act, self.n_atom).to(self.device)
        self.optimizer = optim.Adam(self.net.parameters(), lr=args.lr)

    def learn(self):
        while True:
            self.learn_step_counter += 1
            # target parameter update
            if self.learn_step_counter % 10 == 0:
                self.update_target()

            states, actions, rewards, next_states, dones = self.q_batch.get(block=True)
    
            states = torch.FloatTensor(states).to(self.device)
            actions = torch.LongTensor(actions).to(self.device)
            next_states = torch.FloatTensor(next_states).to(self.device)
            dones = [int(i) for i in dones]

            # action value distribution prediction
            # (m, N_ACTIONS, N_ATOM)
            curr_q = self.net(states)

            # 実際に行動したQだけを取り出す
            curr_q = torch.stack([curr_q[i].index_select(0, actions[i]) 
                                    for i in range(self.batch_size)]).squeeze(1)

            # get next state value
            next_q = self.net(next_states).detach()  # (m, N_ACTIONS, N_ATOM)
            next_q = torch.sum(next_q * self.z_space.view(1, 1, -1), dim=2)  # (m, N_ACTIONS)
            next_action = next_q.argmax(dim=1)  # (m)

            # target_q
            target_q = self.target_net(next_states).detach().cpu().numpy()
            target_q = [target_q[i, action, :] for i, action in enumerate(next_action)]
            target_q = np.array(target_q)  # (m, N_ATOM)

            m_prob = np.zeros((self.batch_size, self.n_atom))  # (m, N_ATOM)

            # we didn't vectorize the computation of target assignment.
            for i in range(self.batch_size):
                for j in range(self.n_atom):
                    Tz = np.fmin(self.v_max,
                            np.fmax(self.v_min,
                                    rewards[i]
                                    + (1 - dones[i]) * 0.99 * (self.v_min + j * self.dz)
                                    )
                            )
  
                    bj = (Tz - self.v_min) / self.dz

                    lj = np.floor(bj).astype(int)   # m_l
                    uj = np.ceil(bj).astype(int)    # m_u

                    # calc prob mass of relative position weighted with distance
                    m_prob[i, lj] += (dones[i] + (1 - dones[i]) * target_q[i][j]) * (uj - bj)
                    m_prob[i, uj] += (dones[i] + (1 - dones[i]) * target_q[i][j]) * (bj - lj)

            m_prob = m_prob / m_prob.sum(axis=1, keepdims=1)

            m_prob = torch.FloatTensor(m_prob).to(self.device)
            # print(curr_q)

            # calc huber loss, dont reduce for importance weight
            loss = - torch.mean(torch.sum(m_prob * torch.log(curr_q + 1e-20), dim=1))  # (m , N_ATOM)
            
            if self.learn_step_counter % 100 == 0:
                print('loss:', loss.item())

            # backprop loss
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

    def update_target(self):
        self.target_net.load_state_dict(self.net.state_dict())
示例#15
0
class Learner:
    def __init__(self, args, q_batch):
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.q_batch = q_batch
        self.update_count = 0
        self.gamma = args.gamma
        self.batch_size = args.batch_size

        self.env_eval = gym.make(args.env)
        self.n_act = self.env_eval.action_space.n
        self.n_state = self.env_eval.observation_space.shape[0]
        self.n_quant = args.quant

        self.target_net_update_freq = args.target_net_update_freq

        self.net = ConvNet(self.n_state, self.n_act,
                           self.n_quant).to(self.device)
        self.target_net = ConvNet(self.n_state, self.n_act,
                                  self.n_quant).to(self.device)
        self.optimizer = optim.Adam(self.net.parameters(), lr=args.lr)

    def learn(self):
        while True:
            self.update_count += 1

            if self.update_count % 10 == 0:
                rewards = self.evaluation()
                rewards_mu = np.array(
                    [np.sum(np.array(l_i), 0) for l_i in rewards]).mean()
                print('update cnt %d Eval Reward %.2f' %
                      (self.update_count, rewards_mu))

            # target parameter update
            if self.update_count % self.target_net_update_freq == 0:
                self.update_target()

            states, actions, rewards, next_states, dones = self.q_batch.get(
                block=True)

            states = torch.FloatTensor(states).to(self.device)
            actions = torch.LongTensor(actions).to(self.device)
            next_states = torch.FloatTensor(next_states).to(self.device)
            dones = np.array([int(i) for i in dones])

            # action value distribution prediction
            # [BATCH, N_QUANT, N_ACTIONS]
            curr_q, tau = self.net(states)

            # 実際に行動したQだけを取り出す
            # [BATCH, N_QUANT, 1]
            curr_q = torch.stack([
                curr_q[i].index_select(1, actions[i])
                for i in range(self.batch_size)
            ])

            # # [BATCH, N_QUANT, N_QUANT]
            curr_q = curr_q.repeat(1, 1, self.n_quant)

            # get next state value
            # [BATCH, N_QUANT, N_ACTIONS]
            next_q, _ = self.net(next_states)
            next_action = next_q.sum(dim=1).argmax(dim=1)

            # target_q
            with torch.no_grad():
                # [BATCH, N_QUANT, N_ACT]
                target_q, _ = self.target_net(next_states)
                target_q = target_q.detach().cpu().numpy()

                # [BATCH, N_QUANT, 1]
                target_q = np.array([
                    target_q[i, :, action]
                    for i, action in enumerate(next_action)
                ])
                target_q = rewards.reshape(
                    -1, 1) + self.gamma * target_q * (1 - dones.reshape(-1, 1))
                target_q = torch.FloatTensor(target_q).to(
                    self.device).unsqueeze(2)

                # # [BATCH, N_QUANT, N_QUANT]
                target_q = target_q.repeat(1, 1, self.n_quant)
                target_q = target_q.permute(0, 2, 1)

            # loss = F.smooth_l1_loss(curr_q, target_q.detach(), reduction='none')

            # (BATCH, N_QUANT, N_QUANT)
            tau = tau.repeat(1, 1, self.n_quant)
            diff = target_q - curr_q

            loss = self.huber(diff)

            I_delta = (diff < 0).double()
            loss *= torch.abs(tau - I_delta)

            # huber loss
            loss = torch.mean(torch.sum(torch.mean(loss, dim=2), dim=1))

            # backprop loss
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

    def huber(self, x):
        cond = (x.abs() < 1.0).float().detach()
        return 0.5 * x.pow(2) * cond + (x.abs() - 0.5) * (1.0 - cond)

    def update_target(self):
        self.target_net.load_state_dict(self.net.state_dict())

    def evaluation(self):
        rewards = []
        for _ in range(10):
            rewards_i = []

            state = self.env_eval.reset()
            action = self.action(state)
            state, reward, done, _ = self.env_eval.step(action)
            rewards_i.append(reward)

            while not done:
                action = self.action(state)
                state, reward, done, _ = self.env_eval.step(action)
                rewards_i.append(reward)
            rewards.append(rewards_i)

        return rewards

    def action(self, state):
        state = torch.FloatTensor(state).to(self.device).unsqueeze(0)

        action_value, _ = self.net(state)
        # if self.update_count > 3000:
        #     dist_action = action_value[0].detach().cpu().numpy()
        #     sns.distplot(dist_action[:, 0], bins=10, color='red')
        #     sns.distplot(dist_action[:, 1], bins=10, color='blue')
        #     plt.show()

        action_value = action_value[0].sum(dim=0)
        action = torch.argmax(action_value).detach().cpu().item()
        return action
示例#16
0
class Wrapper(object):
    """docstring for Wrapper."""

    def __init__(self, config, cont=None):
        super(Wrapper, self).__init__()
        with open(config, 'r') as f:
            config = json.load(f)
        self.config = config
        self.best_path = str(self.config['model']['model_save_path'] +
            self.config['name'] + '_model_best.pt')
        self.model = ConvNet(config['model'])
        self.continuing = False
        if cont is not None:
            print('loading in weights')
            self.load_model(cont)
            self.continuing = True

        self.cuda = torch.cuda.is_available()
        if self.cuda:
            print('using cuda')
            self.model.cuda()

    def train(self):
        model = self.model
        config = self.config
        trainloader = DataLoader(
            KanjiDataset(self.config, train=True),
                batch_size=config['train']['batch_size'], shuffle=True, pin_memory=True)
        # self.valloader = DataLoader(
        #     KanjiDataset(self.config, train=False),
        #         batch_size=config['train']['batch_size'], pin_memory=True)
        self.valset = KanjiDataset(self.config, train=False)
        objective = nn.CrossEntropyLoss()
        self.objective = objective
        optimizer = optim.Adam(model.parameters(), lr=config['train']['learning_rate'])

        # bestloss = float('Inf') if not self.continuing else self.valid()
        bestacc = 0.0 if not self.continuing else self.eval()[0]
        past_best = 0
        max_past = 50
        for e in range(config['train']['epochs']):
            avgloss = 0.0
            for i, (x, y) in enumerate(trainloader):
                if self.cuda:
                    x = x.cuda(async=True)
                    y = y.cuda(async=True)

                optimizer.zero_grad()
                preds = model(x)
                loss = objective(preds, y)
                avgloss += loss.item()
                loss.backward()
                optimizer.step()

                preds = None
                gc.collect()
            avgloss /= len(trainloader)
            # vloss = self.valid()
            vacc = self.eval()[0]
            if e%5==0:
                print('epoch: {}, loss: {:.4f}, val_acc: {:.4f}'
                    .format( e+1,       avgloss,           vacc ) )
                # print('epoch: {}, loss: {:.4f}, val_loss: {:.4f}, memory: {:.4f}'
                #     .format(e+1, avgloss, vloss, torch.cuda.memory_allocated(0) / 1e9 ) )
            # if e%20==0:
            #     self.print_acc()
            # if vloss < bestloss:
            if vacc > bestacc:
                path = str(self.config['model']['model_save_path'] +
                    self.config['name'] + '_model_{:.4f}.pt'.format(vacc))
                self.save_model(path)
                self.save_model(self.best_path)
                # bestloss = vloss
                bestacc = vacc
                past_best = 0
            else:
                past_best += 1
            if past_best >= max_past:
                print('past')
                break

        self.valloader = None
        self.print_acc()
        return

    def valid(self):
        loss = 0.0
        for (x, y) in self.valloader:
            if self.cuda:
                x = x.cuda(async=True)
                y = y.cuda(async=True)
            loss += self.objective(self.model(x), y).item()
        return loss/len(self.valloader)

    def eval(self, train=False):
        validset = self.valset if train else KanjiDataset(self.config, train=False)
        acc = 0
        conf = np.zeros((self.config['model']['classes'],
            self.config['model']['classes']), dtype=np.int32)
        for (x, y) in validset:
            pred = self.predict(x)
            acc += (pred == y)
            conf[y, pred] = conf[y, pred] + 1
        return acc/len(validset), conf

    def print_acc(self):
        acc, conf = self.eval()
        print('acc:', acc)
        print('conf:\n', conf)

    def predict(self, image):
        image = torch.unsqueeze(image, 0)
        if self.cuda:
            image = image.cuda(async=True)
        pred = self.model(image)
        pred = torch.argmax(pred[0])
        return pred.item()

    def save_model(self, path):
        torch.save( self.model.state_dict(), path )
        print('save:', path)

    def load_model(self, cont):
        path = self.best_path
        if cont != 'cont':
            path = join(self.config['model']['model_save_path'], cont)
        print('loading path:', path)
        self.model.load_state_dict( torch.load( path ) )