Пример #1
0
    def forward(self,
                input_ids=None,
                token_type_ids=None,
                attention_mask=None,
                labels=None):
        outputs = self.bert(input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids)
        pooled_output = outputs[1]
        out = None
        loss = 0
        for i in range(self.multi_drop):
            output = self.dropout(pooled_output)
            if labels is not None:
                if i == 0:
                    out = self.classifier(output)
                    loss = compute_loss(out,
                                        labels,
                                        loss_method=self.loss_method)
                else:
                    temp_out = self.classifier(output)
                    temp_loss = compute_loss(temp_out,
                                             labels,
                                             loss_method=self.loss_method)
                    out = out + temp_out
                    loss = loss + temp_loss

        loss = loss / self.multi_drop
        out = out / self.multi_drop

        if self.loss_method in ['binary']:
            out = torch.sigmoid(out).flatten()

        return out, loss
Пример #2
0
    def build_model(self,):
        utils.prepare_data(data_file=self.data_file)
        self.lap_list, self.feature = utils.load_gcn_data(self.graph_file, self.num_support)
        self.num_feature = self.feature.shape[1]
        self.x = tf.placeholder(tf.float32, [None, self.d_input_step, self.d_input_size])
        self.z = tf.placeholder(tf.float32, [None, self.g_input_step, self.g_input_size])
        self.z_t = tf.placeholder(tf.float32, [None, self.g_input_step, self.g_input_size])
        self.lap = tf.placeholder(tf.float32, [self.num_support, self.d_input_size, self.d_input_size])
        self.fea = tf.placeholder(tf.float32, [self.d_input_size, self.num_feature])

        self.x_ = self.generator(self.z, self.g_input_step, self.g_input_size, self.g_hidden_size, self.g_batch_size)
        self.D = self.discriminator(self.x, self.d_input_step, self.d_input_size, self.d_hidden_size, 1, self.g_batch_size)
        self.D_ = self.discriminator(self.x_, self.d_input_step, self.d_input_size, self.d_hidden_size, 1, self.g_batch_size, reuse=True)

        if self.wgan == 1:
            self.d_loss_real = tf.reduce_mean(self.D)
            self.d_loss_fake = tf.reduce_mean(self.D_)
            self.g_loss = self.d_loss_fake
            self.d_loss = self.d_loss_real - self.d_loss_fake

        else:
            self.d_loss_real = utils.compute_loss(self.D, tf.ones_like(self.D))
            self.d_loss_fake = utils.compute_loss(self.D_, tf.zeros_like(self.D_))
            self.g_loss = utils.compute_loss(self.D_, tf.ones_like(self.D_))
            self.d_loss = self.d_loss_real + self.d_loss_fake

        self.accuracy = utils.compute_accuracy(self.z_t, self.z_)
Пример #3
0
def test(epoch, model, test_loader, writer, sigma_0, lr_sigma, iters_sig):
    model = model.eval()
    test_loss = 0
    test_loss_corrupted = 0
    total = 0
    correct = 0
    correct_corrupted = 0
    for _, (batch, targets, idx) in enumerate(test_loader):
        batch = batch.to(device)
        targets = targets.to(device)

        sigma, batch_corrupted = get_sigma(model, batch, lr_sigma,
                                           sigma_0[idx], iters_sig, device)
        sigma_0[idx] = sigma  # update sigma
        with torch.no_grad():

            # forward pass through the base classifier
            outputs_softmax = model(batch)
            outputs_corrputed_softmax = model(batch_corrupted)

        loss = compute_loss(outputs_softmax, targets)
        loss_corrupted = compute_loss(outputs_corrputed_softmax, targets)

        test_loss += loss.item() * len(batch)
        test_loss_corrupted += loss_corrupted.item() * len(batch)

        _, predicted = outputs_softmax.max(1)
        _, predicted_corrupted = outputs_corrputed_softmax.max(1)

        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        correct_corrupted += predicted_corrupted.eq(targets).sum().item()

    print(
        '===> Test Loss: {}. Test Accuracy: {}. Test Loss Corrupted: {}. Test Accuracy Corrupted: {}'
        .format(test_loss / total, 100. * correct / total,
                test_loss_corrupted / total, 100. * correct_corrupted / total))
    n = min(batch.size(0), 8)
    comparison = torch.cat([batch[:n], batch_corrupted[:n]])
    comparison = torch.clamp(comparison, min=0, max=1)
    fig = plot_samples(comparison.detach().cpu().numpy().transpose(
        0, 2, 3, 1).squeeze(),
                       h=2,
                       w=n)

    writer.add_figure('sample of noisy test examples', fig, epoch)
    writer.add_scalar('loss/test_loss', test_loss / total, epoch)
    writer.add_scalar('accuracy/test_accuracy', 100. * correct / total, epoch)
    writer.add_scalar('loss/test_loss_corrupted', test_loss_corrupted / total,
                      epoch)
    writer.add_scalar('accuracy/test_accuracy_corrupted',
                      100. * correct_corrupted / total, epoch)
    writer.add_scalar('sigma/test_sigma_mean', sigma_0.mean().item(), epoch)
    writer.add_scalar('sigma/test_sigma_min', sigma_0.min().item(), epoch)
    writer.add_scalar('sigma/test_sigma_max', sigma_0.max().item(), epoch)

    return 100. * correct_corrupted / total, sigma_0
Пример #4
0
def validate_ori(args, model, criterion, test_data):
    # PREPARE DATA
    dataloader = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers)
    # pesq_test=(random.randrange(len(test_data)))
    # VALIDATE
    model.eval()

    # print('np.zeros(len(test_data) // args.batch_size)',np.zeros(len(test_data) // args.batch_size))

    total_loss = 0.
    with tqdm(total=len(test_data) //
              args.batch_size) as pbar, torch.no_grad():
        for example_num, (x, targets) in enumerate(dataloader):
            if args.cuda:
                x = x.cuda()
                targets = targets.cuda()

            outputs, avg_loss = compute_loss(model, x, targets, criterion)
            total_loss += (1. / float(example_num + 1)) * (avg_loss -
                                                           total_loss)

    return total_loss
Пример #5
0
def train(args, n_actors, batch_queue, prios_queue, param_queue):
    env = wrapper.make_atari(args.env)
    env = wrapper.wrap_atari_dqn(env, args)
    utils.set_global_seeds(args.seed, use_torch=True)

    model = DuelingDQN(env, args).to(args.device)
    # model.load_state_dict(torch.load('model_30h.pth'))
    tgt_model = DuelingDQN(env, args).to(args.device)
    tgt_model.load_state_dict(model.state_dict())

    writer = SummaryWriter(comment="-{}-learner".format(args.env))
    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    # optimizer = torch.optim.RMSprop(model.parameters(), args.lr, alpha=0.95, eps=1.5e-7, centered=True)

    check_connection(n_actors)

    param_queue.put(model.state_dict())
    learn_idx = 0
    ts = time.time()
    tb_dict = {
        k: []
        for k in ['loss', 'grad_norm', 'max_q', 'mean_q', 'min_q']
    }
    while True:
        *batch, idxes = batch_queue.get()
        loss, prios, q_values = utils.compute_loss(model, tgt_model, batch,
                                                   args.n_steps, args.gamma)
        grad_norm = utils.update_parameters(loss, model, optimizer,
                                            args.max_norm)
        prios_queue.put((idxes, prios))
        batch, idxes, prios = None, None, None
        learn_idx += 1

        tb_dict["loss"].append(float(loss))
        tb_dict["grad_norm"].append(float(grad_norm))
        tb_dict["max_q"].append(float(torch.max(q_values)))
        tb_dict["mean_q"].append(float(torch.mean(q_values)))
        tb_dict["min_q"].append(float(torch.min(q_values)))

        if args.soft_target_update:
            tau = args.tau
            for p_tgt, p in zip(tgt_model.parameters(), model.parameters()):
                p_tgt.data *= 1 - tau
                p_tgt.data += tau * p
        elif learn_idx % args.target_update_interval == 0:
            print("Updating Target Network..")
            tgt_model.load_state_dict(model.state_dict())
        if learn_idx % args.save_interval == 0:
            print("Saving Model..")
            torch.save(model.state_dict(), "model.pth")
        if learn_idx % args.publish_param_interval == 0:
            param_queue.put(model.state_dict())
        if learn_idx % args.tb_interval == 0:
            bps = args.tb_interval / (time.time() - ts)
            print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps))
            writer.add_scalar("learner/BPS", bps, learn_idx)
            for k, v in tb_dict.items():
                writer.add_scalar(f'learner/{k}', np.mean(v), learn_idx)
                v.clear()
            ts = time.time()
Пример #6
0
    def forward(self,
                inputs=None,
                attention_mask=None,
                output_id=None,
                labels=None):
        inputs = torch.relu(self.text_linear(inputs))
        bert_outputs = self.roberta(inputs_embeds=inputs,
                                    attention_mask=attention_mask)

        #calculate mlm loss
        last_hidden_state = bert_outputs[0]
        output_id_tmp = output_id[output_id.ne(-100)]
        output_id_emb = last_hidden_state[output_id.ne(-100)]
        pre_score = self.vocab_layer(output_id_emb)
        loss_cro = CrossEntropyLoss()
        mlm_loss = loss_cro(torch.sigmoid(pre_score), output_id_tmp)

        labels_bool = labels.ne(-1)
        if labels_bool.sum().item() == 0:
            return mlm_loss, torch.tensor([])

        #calculate label loss
        pooled_output = bert_outputs[1]
        out = self.classifier(pooled_output)
        out = out[labels_bool]
        labels_tmp = labels[labels_bool]
        label_loss = compute_loss(out, labels_tmp)
        out = torch.sigmoid(out).flatten()
        return mlm_loss + label_loss, out

        return out, loss
Пример #7
0
def _main():
    learner = TrajectoryLearner()
    learner.setup_inference(FLAGS, mode='loss')

    saver = tf.train.Saver([var for var in tf.trainable_variables()])
    init = tf.initialize_all_variables()

    test_generator = DirectoryIterator(FLAGS.test_dir,
                                       shuffle=False,
                                       target_size=(FLAGS.img_width,
                                                    FLAGS.img_height),
                                       batch_size=FLAGS.batch_size)

    steps = int(math.ceil(test_generator.samples / FLAGS.batch_size))

    with tf.Session() as sess:
        saver.restore(sess, FLAGS.ckpt_file)
        print("--------------------------------------------------")
        print("Restored checkpoint file {}".format(FLAGS.ckpt_file))
        print("--------------------------------------------------")
        outs = compute_loss(sess, learner, test_generator, steps, verbose=1)

    # Logging
    print("Average Vel Std: {:.3f}".format(outs['vel_std']))
    print("Average Point Std: {:.3f}".format(outs['pnt_std']))
    print("Average Vel MSE: {:.3f}".format(outs['vel_mse']))
    print("Average Point MSE: {:.3f}".format(outs['pnt_mse']))
def _val(epoch):
    print('=> val')
    TxtEnc.eval()
    ImgEnc.eval()
    loss_epoch = 0.0
    imgs = []
    rcps = []
    for batch in tqdm(val_loader):
        recipe = batch
        recipe[0], recipe[1] = recipe[0].to(device), recipe[1].to(device)
        with torch.no_grad():
            txts_sub = TxtEnc(recipe[0])
            imgs_sub = ImgEnc(recipe[1])
            loss = compute_loss(txts_sub, imgs_sub, device)
            loss_epoch += loss.item() * recipe[1].shape[0]
        rcps.append(txts_sub.detach().cpu().numpy())
        imgs.append(imgs_sub.detach().cpu().numpy())
    rcps = np.concatenate(rcps, axis=0)
    imgs = np.concatenate(imgs, axis=0)
    print('=> computing ranks...')
    medR, medR_std, recalls = rank(rcps, imgs, args.retrieved_type,
                                   args.retrieved_range)
    print('=> val MedR: {:.4f}({:.4f})'.format(medR, medR_std))
    writer.add_scalar('medR', medR, epoch)
    writer.add_scalar('medR_std', medR_std, epoch)
    for k, v in recalls.items():
        writer.add_scalar('Recall@{}'.format(k), v, epoch)
    loss_epoch /= len(val_set)
    writer.add_scalar('loss_epoch_val', loss_epoch, epoch)
    scheduler.step(loss_epoch)
Пример #9
0
def validate(args, model, criterion, test_data):
    # PREPARE DATA
    dataloader = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers)

    # VALIDATE
    model.eval()
    total_loss = 0.
    with tqdm(total=len(test_data) // args.batch_size) as pbar, torch.no_grad():
        for example_num, (x, targets) in enumerate(dataloader):
            if args.cuda:
                x = x.cuda()
                for k in list(targets.keys()):
                    targets[k] = targets[k].cuda()

            _, avg_loss = compute_loss(model, x, targets, criterion)

            total_loss += (1. / float(example_num + 1)) * (avg_loss - total_loss)

            pbar.set_description("Current loss: " + str(total_loss))
            pbar.update(1)

    return total_loss
Пример #10
0
def evaluate_model(model, dataset, train, test, hyperparams):
    is_cifar_model = model.__name__ == "Cifar10CustomModel"
    cifar_model_weights_path = joinpath(RESULTS_DIR,
                                        "Cifar10CustomModel-weights.pkl")

    start_time = time.time()
    train_data, test_data = model.prepare_dataset(train, test,
                                                  dataset.categorical_features)
    estimator = model.build_estimator(hyperparams, train_data)

    # Restore Cifar10CustomModel if weights have been saved
    if is_cifar_model and isfile(cifar_model_weights_path):
        estimator.initialize()
        estimator.load_params(f_params=cifar_model_weights_path)
        train_time = -1
    else:
        X, y, *_ = train_data
        estimator.fit(X, y)
        train_time = time.time() - start_time
        if is_cifar_model:
            estimator.save_params(f_params=cifar_model_weights_path)

    start_time = time.time()
    X_test, y_test = test_data
    metric_value = compute_metric(y_test, estimator.predict(X_test),
                                  dataset.metric)
    score = -compute_loss(dataset.metric, [metric_value])
    evaluation_time = time.time() - start_time

    return score, train_time, evaluation_time
Пример #11
0
    def compute_td_loss(self,batch_size, beta):
        state, action, reward, next_state, done, weights, indices  = self.replay_buffer.sample(batch_size, beta) 

        state      = torch.FloatTensor(state).to(self.device)
        next_state = torch.FloatTensor(next_state).to(self.device)
        action     = torch.LongTensor(action).to(self.device)
        reward     = torch.FloatTensor(reward).to(self.device)
        done       = torch.FloatTensor(done).to(self.device)
        weights    = torch.FloatTensor(weights).to(self.device)
        batch = (state, action, reward, next_state, done, weights)

        # q_values      = self.model(state)
        # next_q_values = self.target_model(next_state)

        # q_value          = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
        # next_q_value     = next_q_values.max(1)[0]
        # expected_q_value = reward + self.gamma * next_q_value * (1 - done)
        
        # td_error = torch.abs(expected_q_value.detach() - q_value)
        # loss  = (td_error).pow(2) * weights
        # prios = loss+1e-5#0.9 * torch.max(td_error)+(1-0.9)*td_error
        # loss  = loss.mean()
        loss, prios = utils.compute_loss(self.model,self.target_model, batch,1)
            
        self.optimizer.zero_grad()
        loss.backward()
        self.scheduler.step()
        self.replay_buffer.update_priorities(indices, prios)
        self.optimizer.step()
        return loss    
Пример #12
0
def run_HetGNN(model, hg, het_graph, config):
    # het_graph is used to sample neighbour
    hg = hg.to('cpu')
    category = config.category
    train_mask = hg.nodes[category].data.pop('train_mask')
    test_mask = hg.nodes[category].data.pop('test_mask')
    train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
    test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
    labels = hg.nodes[category].data.pop('label')
    emd = hg.nodes[category].data['dw_embedding']
    train_batch = load_link_pred('./a_a_list_train.txt')
    test_batch = load_link_pred('./a_a_list_test.txt')
    # HetGNN Sampler
    batch_sampler = SkipGramBatchSampler(hg, config.batch_size,
                                         config.window_size)
    neighbor_sampler = NeighborSampler(het_graph, hg.ntypes,
                                       batch_sampler.num_nodes, config.device)
    collator = HetGNNCollator(neighbor_sampler, hg)
    dataloader = DataLoader(batch_sampler,
                            collate_fn=collator.collate_train,
                            num_workers=config.num_workers)

    opt = th.optim.Adam(model.parameters())

    pred = ScorePredictor()
    dataloader_it = iter(dataloader)
    for i in range(config.max_epoch):
        model.train()
        for batch_id in tqdm.trange(config.batches_per_epoch):
            positive_graph, negative_graph, blocks = next(dataloader_it)
            blocks = [b.to(config.device) for b in blocks]
            positive_graph = positive_graph.to(config.device)
            negative_graph = negative_graph.to(config.device)
            # we need extract multi-feature
            input_features = extract_feature(blocks[0], hg.ntypes)

            x = model(blocks[0], input_features)
            loss = compute_loss(pred(positive_graph, x),
                                pred(negative_graph, x))

            opt.zero_grad()
            loss.backward()
            opt.step()
        print('Epoch {:05d} |Train - Loss: {:.4f}'.format(i, loss.item()))
        input_features = extract_feature(het_graph, hg.ntypes)
        x = model(het_graph, input_features)
        author_link_prediction(x['author'].to('cpu').detach(), train_batch,
                               test_batch)
        micro_f1, macro_f1 = Hetgnn_evaluate(
            x[config.category].to('cpu').detach(), labels, train_idx, test_idx)
        print('<Classification>     Micro-F1 = %.4f, Macro-F1 = %.4f' %
              (micro_f1, macro_f1))
    pass
Пример #13
0
def train(args, n_actors, batch_queue, prios_queue, param_queue):
    env = RunTagEnv(width=5,
                    height=5,
                    number_of_subordinates=1,
                    max_steps=1000)
    #env = wrapper.make_atari(args.env)
    #env = wrapper.wrap_atari_dqn(env, args)
    utils.set_global_seeds(args.seed, use_torch=True)

    model = DuelingDQN(env).to(args.device)
    tgt_model = DuelingDQN(env).to(args.device)
    tgt_model.load_state_dict(model.state_dict())

    writer = SummaryWriter(comment="-{}-learner".format(args.env))
    # optimizer = torch.optim.Adam(model.parameters(), args.lr)
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    args.lr,
                                    alpha=0.95,
                                    eps=1.5e-7,
                                    centered=True)

    check_connection(n_actors)

    param_queue.put(model.state_dict())
    learn_idx = 0
    ts = time.time()
    while True:
        *batch, idxes = batch_queue.get()
        loss, prios = utils.compute_loss(model, tgt_model, batch, args.n_steps,
                                         args.gamma)
        grad_norm = utils.update_parameters(loss, model, optimizer,
                                            args.max_norm)
        print('Updated parameters!')
        prios_queue.put((idxes, prios))
        batch, idxes, prios = None, None, None
        learn_idx += 1

        writer.add_scalar("learner/loss", loss, learn_idx)
        writer.add_scalar("learner/grad_norm", grad_norm, learn_idx)

        if learn_idx % args.target_update_interval == 0:
            print("Updating Target Network..")
            tgt_model.load_state_dict(model.state_dict())
        if learn_idx % args.save_interval == 0:
            print("Saving Model..")
            torch.save(model.state_dict(), "model.pth")
        if learn_idx % args.publish_param_interval == 0:
            param_queue.put(model.state_dict())
        if learn_idx % args.bps_interval == 0:
            bps = args.bps_interval / (time.time() - ts)
            print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps))
            writer.add_scalar("learner/BPS", bps, learn_idx)
            ts = time.time()
Пример #14
0
def train_step(x, y, model, optimizer):
    # Use tf.GradientTape()
    with tf.GradientTape() as tape:
        y_hat = model(x)

        loss = compute_loss(y, y_hat)

    # Now, compute the gradients
    grads = tape.gradient(loss, model.trainable_variables)

    # Apply the gradients to the optimizer so it can update the model accordingly
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss
Пример #15
0
    def test(self):
        self.test_net.load_state_dict(self.train_net.state_dict())

        DB = self.gen_batch()
        seq = Variable(Tensor(DB[0]))
        seq_m = Variable(torch.LongTensor(DB[1].astype('int64')))
        target = Variable(Tensor(DB[2]))
        label = Variable(torch.LongTensor(DB[3].astype('int64')))

        pointer = self.test_net(seq, seq_m, target)
        loss = utils.compute_loss(pointer, label, target)
        acc = utils.compute_acc(pointer, label)
        return pointer, loss, acc, label
Пример #16
0
def validation(model, criterion, evaluation_loader, converter, opt):
    """ validation or evaluation """
    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()

    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        image = image_tensors.to(device)
        # For max length prediction
        length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                          batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                         1).fill_(0).to(device)

        text_for_loss, length_for_loss = converter.encode(
            labels, batch_max_length=opt.batch_max_length)

        start_time = time.time()

        preds, global_feature, local_feature, attention_weights, transformed_imgs, control_points = model(
            image, text_for_pred, is_train=False)

        forward_time = time.time() - start_time

        preds = preds[:, :text_for_loss.shape[1] - 1, :]
        target = text_for_loss[:, 1:]  # without [GO] Symbol
        cost = criterion(preds.contiguous().view(-1, preds.shape[-1]),
                         target.contiguous().view(-1))

        # select max probabilty (greedy decoding) then decode index to character
        preds_score, preds_index = preds.max(2)
        preds_str = converter.decode(preds_index, length_for_pred)
        labels = converter.decode(text_for_loss[:, 1:], length_for_loss)

        infer_time += forward_time
        valid_loss_avg.add(cost)

        # calculate accuracy.
        batch_n_correct, batch_char_acc = compute_loss(preds_str, labels, opt)
        n_correct += batch_n_correct
        norm_ED += batch_char_acc

    accuracy = n_correct / float(length_of_data) * 100
    norm_ED = norm_ED / float(length_of_data) * 100

    return valid_loss_avg.val(
    ), accuracy, norm_ED, preds_str, labels, infer_time, length_of_data
Пример #17
0
    def train(self):
        DB = self.gen_batch()
        seq = Variable(Tensor(DB[0]))
        seq_m = Variable(torch.LongTensor(DB[1].astype('int64')))
        target = Variable(Tensor(DB[2]))
        label = Variable(torch.LongTensor(DB[3].astype('int64')))

        pointer = self.train_net(seq, seq_m, target)
        loss = utils.compute_loss(pointer, label, target)
        acc = utils.compute_acc(pointer, label)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return pointer, loss, acc, label
Пример #18
0
    def train(self):
        utils.set_global_seeds(self.seed, use_torch=True)

        learn_idx = 0
        while True:
            beta = self.beta_by_frame(learn_idx)
            states, actions, rewards, next_states, dones, weights, idxes = self.buffer.sample(
                self.batch_size, beta)
            states = torch.FloatTensor(states).to(self.device)
            actions = torch.LongTensor(actions).to(self.device)
            rewards = torch.FloatTensor(rewards).to(self.device)
            next_states = torch.FloatTensor(next_states).to(self.device)
            dones = torch.FloatTensor(dones).to(self.device)
            weights = torch.FloatTensor(weights).to(self.device)
            batch = (states, actions, rewards, next_states, dones, weights)

            loss, prios = utils.compute_loss(self.model, self.tgt_model, batch,
                                             self.n_step, self.gamma)

            self.scheduler.step()
            grad_norm = utils.update_parameters(loss, self.model,
                                                self.optimizer, self.max_norm)

            self.buffer.update_priorities(idxes, prios)

            batch, idxes, prios = None, None, None
            learn_idx += 1

            self.writer.add_scalar("learner/loss", loss, learn_idx)
            self.writer.add_scalar("learner/grad_norm", grad_norm, learn_idx)

            if learn_idx % self.target_update_interval == 0:
                print("Updating Target Network..")
                self.tgt_model.load_state_dict(self.model.state_dict())
            if learn_idx % self.save_interval == 0:
                print("Saving Model..")
                torch.save(self.model.state_dict(),
                           "model{}.pth".format(learn_idx))
            if learn_idx % self.publish_param_interval == 0:
                self.batch_recorder.set_worker_weights(
                    copy.deepcopy(self.model))
            if learn_idx >= self.max_step:
                torch.save(self.model.state_dict(),
                           "model{}.pth".format(learn_idx))
                self.batch_recorder.cleanup()
                break
Пример #19
0
def find_close_point(X, Y, center, TARGET=2, k=50):
    """
    @topic: Find top k cloest points to given center.
    @input: 
        X: dataset(2D); Y: label(1D);
        center: the center of target cluster;
        TARGET: the target number that we need to handle;
        k: top k cloest points.
    @return: the index of top k cloest points to given center in lable.
    """
    loss_arr = np.zeros_like(Y, dtype=float)
    for l in range(len(Y)):
        if Y[l] == TARGET:
            loss = compute_loss(X[l], center)
            loss_arr[l] = loss
        else:
            loss_arr[l] = float("inf")
    return np.array(loss_arr).argsort()[:k]
Пример #20
0
def train_epoch(model, epoch, train_data_loader, optimizer):
	
	model.train()
	for param in model.parameters():  # Setting complete model to be trainable
		param.requires_grad = True

	learning_rate = 0.0
	if epoch < 30:
		for param_group in optimizer.param_groups:
			param_group['lr'] = 1e-2
			learning_rate = param_group['lr']
	if epoch >= 30 and epoch < 60:
		for param_group in optimizer_backbone.param_groups:
			param_group['lr'] = 1e-3
			learning_rate = param_group['lr']
	if epoch >= 60:
		for param_group in optimizer_backbone.param_groups:
			param_group['lr'] = 1e-4
			learning_rate = param_group['lr']

	print("epoch number --> " + str(epoch) + " learning rate ---> " + str(learning_rate))

	train_loss = 0
	for batch_idx, (data, target) in enumerate(train_data_loader):
		
		data = data.to(dev)
		for key in target.keys():
			target[key] = target[key].to(dev)

		optimizer.zero_grad()

		sempred, inspred, insreg = model(data)
		
		loss = compute_loss(sempred, inspred, insreg, target) 

		loss.backward()

		train_loss += loss.item()

		optimizer.step()

	print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss))
Пример #21
0
    def objective(args):
        try:
            estimator = model.build_estimator(args, train)
            metric_values = []
            X, y, *_ = train
            for train_index, val_index in kfold.split(*train):
                X_train, X_val = X[train_index], X[val_index]
                y_train, y_val = y[train_index], y[val_index]

                estimator.fit(X_train, y_train)
                metric_value = compute_metric(y_val, estimator.predict(X_val), dataset.metric)
                metric_values.append(metric_value)
                if not getattr(dataset, 'needs_k_fold', True):
                    break

            return compute_loss(dataset.metric, metric_values)
        except ValueError:
            """ With some hyper-parameters combinations, a ValueError can be raised during training
                (in particular MLPRegressor)
            """
            return {'status': 'fail'}
Пример #22
0
    def _validate(self, updates=0):
        """
        Validate on development set.
        """
        tagged_dev_sentences, accuracy = self._tag_dataset(self.dev,
                                                           train_mode=False)

        loss = utils.compute_loss(tagged_dev_sentences, self.dev, 'dev')

        iterator.write(sentences=tagged_dev_sentences,
                       ids=self._dev_iterator.ids,
                       file_name=self._outfile_prefix + '_devset.csv',
                       verbose=True)

        logger.info("Update %r: dev loss/sent=%.4f, acc=%.4f" %
                    (updates, loss, accuracy))

        # Early stop here
        if self._early_stop:
            if loss < self._best_dev_loss:
                logger.info("Dev loss improved to %.4f" % loss)
                self._best_dev_loss = loss
                self._dev_loss_not_improved = 0

                # Save best model
                model_path = self._outfile_prefix + ".m"
                logger.info("Saving best model to '%s'." % model_path)
                self.model.save(model_path)
            else:
                self._dev_loss_not_improved += 1
                if self._dev_loss_not_improved > self._early_stop_patience:
                    logger.info(
                        "Model has not improved for %d validation steps, stopping."
                        % self._dev_loss_not_improved)
                    logger.info("Best dev loss: %.4f" % self._best_dev_loss)
                    self._training_stopped = True
                else:
                    logger.info(
                        "Model has not improved for %d validation steps." %
                        self._dev_loss_not_improved)
Пример #23
0
        def train_step(src_token_ids, tgt_token_ids):
            """Performs a single training step on a minibatch of source and target
      token ids.

      Args:
        src_token_ids: int tensor of shape [batch_size, src_seq_len], lists of
          subtoken ids of batched source sequences ending with EOS_ID and 
          zero-padded.
        tgt_token_ids: int tensor of shape [batch_size, src_seq_len], lists of
          subtoken ids of batched target sequences ending with EOS_ID and 
          zero-padded.

      Returns:
        loss: float scalar tensor, the loss.
        step: int scalar tensor, the global step.
        lr: float scalar tensor, the learning rate.
      """
            with tf.GradientTape() as tape:
                # for each sequence of subtokens s1, s2, ..., sn, 1
                # prepend it with 0 (SOS_ID) and truncate it to the same length:
                # 0, s1, s2, ..., sn
                tgt_token_ids_input = tf.pad(tgt_token_ids,
                                             [[0, 0], [1, 0]])[:, :-1]
                logits = self._model(src_token_ids,
                                     tgt_token_ids_input,
                                     training=True)
                loss = compute_loss(tgt_token_ids, logits,
                                    self._label_smoothing,
                                    self._model._vocab_size)

            gradients = tape.gradient(loss, self._model.trainable_variables)
            if clip_norm is not None:
                gradients, norm = tf.clip_by_global_norm(gradients, clip_norm)
            optimizer.apply_gradients(
                zip(gradients, self._model.trainable_variables))

            step = optimizer.iterations
            lr = optimizer.learning_rate(step)
            return loss, step - 1, lr
Пример #24
0
def _train(epoch):
    global niter
    print('=> train')
    TxtEnc.train()
    ImgEnc.train()
    loss_epoch = 0.0
    for batch in tqdm(train_loader):
        recipe = batch
        recipe[0], recipe[1] = recipe[0].to(device), recipe[1].to(device)
        txt = TxtEnc(recipe[0])
        img = ImgEnc(recipe[1])
        loss = compute_loss(txt, img, device)
        optimizer.zero_grad()
        loss.backward()
        for group in optimizer.param_groups:
            torch.nn.utils.clip_grad_norm_(group['params'], args.grad_clip)
        optimizer.step()

        writer.add_scalar('loss_batch_train', loss.item(), niter)
        loss_epoch += loss.item() * recipe[1].shape[0]
        niter += 1
    loss_epoch /= len(train_set)
    writer.add_scalar('loss_epoch_train', loss_epoch, epoch)
    writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)
Пример #25
0
def run(args):
    if args.slurm:
        args.slurmid = "%s_%s" % (os.environ["SLURM_JOB_ID"],
                                  os.environ["SLURM_ARRAY_TASK_ID"])
    EMA = args.ema
    BATCH_SIZE = args.batch_size
    NUM_Z = args.num_latent
    NUM_FILTERS = args.num_filters
    LR_GEN = args.learning_rate_gen
    LR_DIS = args.learning_rate_dis
    NUM_EPOCHS = args.num_epochs
    SEED = args.seed
    RESOLUTION = args.resolution
    GRADIENT_PENALTY = args.gradient_penalty
    torch.manual_seed(SEED)
    ROOT = args.path_to_dataset

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    exp_name = "%i_%i" % (int(time.time()), np.random.randint(9999))
    if args.slurmid is not None:
        exp_name = args.slurmid
    OUTPUT_PATH = os.path.join(args.output_path,
                               '%i/%s') % (RESOLUTION, exp_name)
    writer = SummaryWriter(log_dir=os.path.join(OUTPUT_PATH, 'runs'))

    print("Loading dataset...")

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = CrocodileDataset(root=ROOT,
                               transform=transform,
                               resolution=RESOLUTION,
                               one_hot=True)
    dataloader = DataLoader(dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=True,
                            num_workers=4)

    print("Init...")

    if args.model == "small":
        gen = models.SmallGenerator(
            NUM_Z + dataset.num_cat,
            RESOLUTION,
            NUM_FILTERS,
            args.num_layers,
            spectral_norm=args.spectral_norm_gen).to(device)
        dis = models.ConditionalSmallDiscriminator(RESOLUTION, dataset.num_cat,
                                                   NUM_FILTERS,
                                                   args.num_layers).to(device)

    gen_optimizer = optim.Adam(gen.parameters(), lr=LR_GEN, betas=(0.5, 0.999))
    dis_optimizer = optim.Adam(dis.parameters(), lr=LR_DIS, betas=(0.5, 0.999))

    z_examples = torch.zeros(1, 10,
                             NUM_Z).normal_().expand(dataset.num_cat, -1, -1)
    y_examples = torch.eye(dataset.num_cat).unsqueeze(1).expand(-1, 10, -1)
    z_examples = torch.cat([z_examples, y_examples],
                           -1).view(-1, NUM_Z + dataset.num_cat).to(device)

    if not os.path.exists(os.path.join(OUTPUT_PATH, "gen")):
        os.makedirs(os.path.join(OUTPUT_PATH, "gen"))
    if not os.path.exists(os.path.join(OUTPUT_PATH, "img")):
        os.makedirs(os.path.join(OUTPUT_PATH, "img"))

    dataiter = iter(dataloader)
    x_examples, _ = dataiter.next()[:100]
    x_examples = x_examples / 2 + 0.5
    torchvision.utils.save_image(x_examples,
                                 os.path.join(OUTPUT_PATH, "examples.png"),
                                 nrow=10)

    with open(os.path.join(OUTPUT_PATH, 'config.json'), 'w') as f:
        json.dump(vars(args), f)

    print("Training...")
    init_epoch = 0
    for epoch in range(NUM_EPOCHS):
        t = time.time()
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)
            z = torch.zeros(len(x), NUM_Z).normal_().to(device)
            z = torch.cat([z, y], -1)

            x_gen = gen(z)
            score_true, score_gen = dis(x, y), dis(x_gen, y)
            loss_gen, loss_dis = utils.compute_loss(score_true,
                                                    score_gen,
                                                    mode="nsgan")
            if GRADIENT_PENALTY:
                loss_dis += GRADIENT_PENALTY * dis.get_penalty(x, x_gen)

            grad_gen = autograd.grad(loss_gen,
                                     gen.parameters(),
                                     retain_graph=True)
            grad_dis = autograd.grad(loss_dis,
                                     dis.parameters(),
                                     retain_graph=True)

            for p, g in zip(gen.parameters(), grad_gen):
                p.grad = g

            for p, g in zip(dis.parameters(), grad_dis):
                p.grad = g

            gen_optimizer.step()
            dis_optimizer.step()

        print("Epoch: %i, Loss dis: %.2e, Loss gen %.2e, Time: %i" %
              (init_epoch + epoch, loss_dis, loss_gen, time.time() - t))

        x_gen = x_gen / 2 + 0.5
        img = torchvision.utils.make_grid(x_gen, nrow=10)
        writer.add_image('gen_random', img, epoch)

        x_gen = gen(z_examples)
        x_gen = x_gen / 2 + 0.5
        img = torchvision.utils.make_grid(
            x_gen,
            nrow=10)  # First dimension is row, second dimension is column
        writer.add_image('gen', img, epoch)
        torchvision.utils.save_image(
            x_gen,
            os.path.join(OUTPUT_PATH,
                         "img/img_%.3i.png" % (init_epoch + epoch)),
            nrow=10)

        torch.save(
            {
                'epoch': init_epoch + epoch,
                'gen_state_dict': gen.state_dict()
            },
            os.path.join(OUTPUT_PATH, "gen/gen_%i.chk" % (init_epoch + epoch)))

        torch.save(
            {
                'epoch': init_epoch + epoch,
                'gen_state_dict': gen.state_dict(),
                'dis_state_dict': dis.state_dict(),
                'gen_optimizer_state_dict': gen_optimizer.state_dict(),
                'dis_optimizer_state_dict': dis_optimizer.state_dict()
            }, os.path.join(OUTPUT_PATH, "last_model.chk"))
Пример #26
0
def train_loop(
    run_id,
    dataset_dir,
    ckpt_run_dir,
    output_dir,
    validation_only=False,
    use_cuda=False,
    light_target=False,
):
    """Train loop"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    rank = dist.get_rank()
    world_size = dist.get_world_size()

    train_epochs = 8
    train_min_len, train_max_len = 0, 75
    val_min_len, val_max_len = 0, 150
    math_mode = "fp16"  # One of `fp16`, `fp32`
    lang = ("en", "de")

    # Training
    train_global_batch_size = 2048  # Global batch size
    max_bs = 128  # Max batch size for used hardware
    update_freq = int(max(1, train_global_batch_size // (max_bs * world_size)))
    train_batch_size = int(train_global_batch_size // (world_size * update_freq))
    val_batch_size = 64

    # Model attributes
    model_args = {
        "hidden_size": 1024,
        "num_layers": 4,
        "dropout": 0.2,
        "share_embedding": True,
        "fusion": True,
    }

    # Criterion
    criterion_args = {"smoothing": 0.1, "fast_xentropy": True}

    # Loss scaling
    loss_scaling = {"init_scale": 1024, "upscale_interval": 128}

    # Optimizer
    optimizer_args = {
        "lr": 2e-3,
        "grad_clip": 5.0,
    }

    # Scheduler
    scheduler_args = {
        "warmup_steps": 200,
        "remain_steps": 0.4,
        "decay_interval": 0.05,
        "decay_steps": 4,
        "decay_factor": 0.5,
    }

    # Translator
    translator_args = {
        "beam_size": 5,
        "len_norm_factor": 0.6,
        "cov_penalty_factor": 0.1,
        "len_norm_const": 5.0,
        "max_seq_len": 150,
    }

    # Build train/val datsets
    train_set = WMT16Dataset(
        dataset_dir,
        math_precision=math_mode,
        lang=lang,
        train=True,
        download=True,
        preprocessed=True,
        min_len=train_min_len,
        max_len=train_max_len,
    )
    train_set.prepare()
    val_set = WMT16Dataset(
        dataset_dir,
        math_precision=math_mode,
        lang=lang,
        validation=True,
        download=False,
        min_len=val_min_len,
        max_len=val_max_len,
        sort=True,
    )

    tokenizer = train_set.tokenizer

    # Build model
    model = GNMT(vocab_size=train_set.vocab_size, **model_args)

    # Build loss function
    criterion = LabelSmoothing(padding_idx=wmt16_config.PAD, **criterion_args)

    # Bilingual Evaluation Understudy Score
    metrics = [BLEUScore()]

    # Partition data
    train_set = partition_dataset_by_rank(train_set, rank, world_size)
    val_set = partition_dataset_by_rank(val_set, rank, world_size)

    collate_fn = build_collate_fn(sort=True)
    train_loader = DataLoader(
        train_set,
        batch_size=train_batch_size,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True,
        drop_last=False,
        shuffle=True,
    )

    val_loader = DataLoader(
        val_set,
        batch_size=val_batch_size,
        collate_fn=collate_fn,
        num_workers=2,
        pin_memory=True,
        drop_last=False,
    )

    validate_every = update_freq * round(
        len(train_loader) * 0.30 / update_freq
    )  # Validate every 30%

    # Build optimizer & scheduler
    total_train_iters = (len(train_loader) // update_freq) * train_epochs

    print("Number of batches per epoch {}".format(len(train_loader)))
    print("Train iterations per epoch {}".format(total_train_iters / train_epochs))

    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    use_horovod = math_mode == "fp16" and dist.get_backend() == dist.Backend.MPI

    if use_horovod:
        hvd.init()
        logger.info("Using horovod rank={}".format(hvd.rank()))
        tensor = torch.tensor([1])
        res = hvd.allreduce(tensor, op=hvd.Sum)
        assert res[0] == world_size

    fp_optimizer, optimizer, model = build_optimizer(
        model=model,
        math=math_mode,
        loss_scaling=loss_scaling,
        use_cuda=use_cuda,
        use_horovod=use_horovod,
        **optimizer_args
    )

    # Create a learning rate scheduler for an optimizer
    scheduler = ExponentialWarmupMultiStepLR(
        optimizer, total_train_iters, **scheduler_args
    )

    # Translator
    translator = Translator(model=model, trg_tokenizer=tokenizer, **translator_args)

    checkpointer = Checkpointer(
        ckpt_run_dir=ckpt_run_dir, rank=rank, freq=CheckpointFreq.BEST
    )

    if not validation_only:

        if light_target:
            goal = task4_time_to_bleu_goal(20)
        else:
            goal = task4_time_to_bleu_goal(24)

        num_batches_per_device_train = len(train_loader)
        tracker = Tracker(metrics, run_id, rank, goal=goal)

        dist.barrier()
        tracker.start()

        for epoch in range(0, train_epochs):
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

            model.train()
            tracker.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                tracker.batch_start()
                data, target = prepare_batch(data, target, use_cuda=use_cuda)
                tracker.record_batch_load()

                is_last = batch_idx == len(train_loader)
                update = (batch_idx % update_freq) == update_freq - 1
                init = (batch_idx % update_freq) == 0

                # Clear gradients in the optimizer.
                if init:
                    fp_optimizer.zero_grad()
                    tracker.record_batch_init()

                # Compute the output
                output = compute_model_output(model, data, target)
                tracker.record_batch_fwd_pass()

                # Compute the loss
                loss, loss_per_token = compute_loss(
                    data, target, output, criterion, update_freq
                )
                tracker.record_batch_comp_loss()
                # Backprop
                fp_optimizer.backward_loss(loss)
                tracker.record_batch_backprop()

                # Opt step
                if update or is_last:
                    # For this task, simply sum all gradients
                    updated = fp_optimizer.step(tracker=tracker, denom=1)

                    # Learning rate scheduler
                    if updated:
                        scheduler.step()

                tracker.batch_end()

                record_train_batch_stats(
                    batch_idx=batch_idx,
                    loss=loss_per_token,
                    output=target[0],  # Use target just for the size
                    metric_results={},
                    tracker=tracker,
                    num_batches_per_device_train=num_batches_per_device_train,
                )

                # Validation during training
                if (batch_idx + 1) % validate_every == 0:
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()

                    metrics_values, loss = validation_round(
                        val_loader,
                        metrics,
                        model,
                        criterion,
                        update_freq,
                        translator,
                        tracker=tracker,
                        use_cuda=use_cuda,
                    )

                    record_validation_stats(metrics_values, loss, tracker, rank)
                    if tracker.goal_reached:
                        break

                    model.train()
                    tracker.train()

            if torch.cuda.is_available():
                torch.cuda.empty_cache()

            metrics_values, loss = validation_round(
                val_loader,
                metrics,
                model,
                criterion,
                update_freq,
                translator,
                use_cuda=use_cuda,
            )

            is_best = record_validation_stats(metrics_values, loss, tracker, rank)

            checkpointer.save(
                tracker,
                model,
                fp_optimizer.optimizer,
                scheduler,
                tracker.current_epoch,
                is_best,
            )

            tracker.epoch_end()

            if tracker.goal_reached:
                print("Goal Reached!")
                dist.barrier()
                time.sleep(10)
                return
    else:
        cecf = CheckpointsEvaluationControlFlow(
            ckpt_dir=ckpt_run_dir,
            rank=rank,
            world_size=world_size,
            checkpointer=checkpointer,
            model=model,
            epochs=train_epochs,
            loss_function=criterion,
            metrics=metrics,
            use_cuda=use_cuda,
            dtype="fp32",
            max_batch_per_epoch=None,
        )

        train_stats = cecf.evaluate_by_epochs(train_loader)
        with open(os.path.join(output_dir, "train_stats.json"), "w") as f:
            json.dump(train_stats, f)

        val_stats = cecf.evaluate_by_epochs(val_loader)
        with open(os.path.join(output_dir, "val_stats.json"), "w") as f:
            json.dump(val_stats, f)
Пример #27
0
def main(args):
    #torch.backends.cudnn.benchmark=True # This makes dilated conv much faster for CuDNN 7.5

    # MODEL
    num_features = [args.features*i for i in range(1, args.levels+1)] if args.feature_growth == "add" else \
                   [args.features*2**i for i in range(0, args.levels)]
    target_outputs = int(args.output_size * args.sr)
    model = Waveunet(args.channels, num_features, args.channels, args.instruments, kernel_size=args.kernel_size,
                     target_output_size=target_outputs, depth=args.depth, strides=args.strides,
                     conv_type=args.conv_type, res=args.res, separate=args.separate)

    if args.cuda:
        model = utils.DataParallel(model)
        print("move model to gpu")
        model.cuda()

    print('model: ', model)
    print('parameter count: ', str(sum(p.numel() for p in model.parameters())))

    writer = SummaryWriter(args.log_dir)

    ### DATASET
    musdb = get_musdb_folds(args.dataset_dir)
    # If not data augmentation, at least crop targets to fit model output shape
    crop_func = partial(crop, shapes=model.shapes)
    # Data augmentation function for training
    augment_func = partial(random_amplify, shapes=model.shapes, min=0.7, max=1.0)
    train_data = SeparationDataset(musdb, "train", args.instruments, args.sr, args.channels, model.shapes, True, args.hdf_dir, audio_transform=augment_func)
    val_data = SeparationDataset(musdb, "val", args.instruments, args.sr, args.channels, model.shapes, False, args.hdf_dir, audio_transform=crop_func)
    test_data = SeparationDataset(musdb, "test", args.instruments, args.sr, args.channels, model.shapes, False, args.hdf_dir, audio_transform=crop_func)

    dataloader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, worker_init_fn=utils.worker_init_fn)

    ##### TRAINING ####

    # Set up the loss function
    if args.loss == "L1":
        criterion = nn.L1Loss()
    elif args.loss == "L2":
        criterion = nn.MSELoss()
    else:
        raise NotImplementedError("Couldn't find this loss!")

    # Set up optimiser
    optimizer = Adam(params=model.parameters(), lr=args.lr)

    # Set up training state dict that will also be saved into checkpoints
    state = {"step" : 0,
             "worse_epochs" : 0,
             "epochs" : 0,
             "best_loss" : np.Inf}

    # LOAD MODEL CHECKPOINT IF DESIRED
    if args.load_model is not None:
        print("Continuing training full model from checkpoint " + str(args.load_model))
        state = utils.load_model(model, optimizer, args.load_model)

    print('TRAINING START')
    while state["worse_epochs"] < args.patience:
        print("Training one epoch from iteration " + str(state["step"]))
        avg_time = 0.
        model.train()
        with tqdm(total=len(train_data) // args.batch_size) as pbar:
            np.random.seed()
            for example_num, (x, targets) in enumerate(dataloader):
                if args.cuda:
                    x = x.cuda()
                    for k in list(targets.keys()):
                        targets[k] = targets[k].cuda()

                t = time.time()

                # Set LR for this iteration
                utils.set_cyclic_lr(optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr)
                writer.add_scalar("lr", utils.get_lr(optimizer), state["step"])

                # Compute loss for each instrument/model
                optimizer.zero_grad()
                outputs, avg_loss = utils.compute_loss(model, x, targets, criterion, compute_grad=True)

                optimizer.step()

                state["step"] += 1

                t = time.time() - t
                avg_time += (1. / float(example_num + 1)) * (t - avg_time)

                writer.add_scalar("train_loss", avg_loss, state["step"])

                if example_num % args.example_freq == 0:
                    input_centre = torch.mean(x[0, :, model.shapes["output_start_frame"]:model.shapes["output_end_frame"]], 0) # Stereo not supported for logs yet
                    writer.add_audio("input", input_centre, state["step"], sample_rate=args.sr)

                    for inst in outputs.keys():
                        writer.add_audio(inst + "_pred", torch.mean(outputs[inst][0], 0), state["step"], sample_rate=args.sr)
                        writer.add_audio(inst + "_target", torch.mean(targets[inst][0], 0), state["step"], sample_rate=args.sr)

                pbar.update(1)

        # VALIDATE
        val_loss = validate(args, model, criterion, val_data)
        print("VALIDATION FINISHED: LOSS: " + str(val_loss))
        writer.add_scalar("val_loss", val_loss, state["step"])

        # EARLY STOPPING CHECK
        checkpoint_path = os.path.join(args.checkpoint_dir, "checkpoint_" + str(state["step"]))
        if val_loss >= state["best_loss"]:
            state["worse_epochs"] += 1
        else:
            print("MODEL IMPROVED ON VALIDATION SET!")
            state["worse_epochs"] = 0
            state["best_loss"] = val_loss
            state["best_checkpoint"] = checkpoint_path

        # CHECKPOINT
        print("Saving model...")
        utils.save_model(model, optimizer, state, checkpoint_path)

        state["epochs"] += 1

    #### TESTING ####
    # Test loss
    print("TESTING")

    # Load best model based on validation loss
    state = utils.load_model(model, None, state["best_checkpoint"])
    test_loss = validate(args, model, criterion, test_data)
    print("TEST FINISHED: LOSS: " + str(test_loss))
    writer.add_scalar("test_loss", test_loss, state["step"])

    # Mir_eval metrics
    test_metrics = evaluate(args, musdb["test"], model, args.instruments)

    # Dump all metrics results into pickle file for later analysis if needed
    with open(os.path.join(args.checkpoint_dir, "results.pkl"), "wb") as f:
        pickle.dump(test_metrics, f)

    # Write most important metrics into Tensorboard log
    avg_SDRs = {inst : np.mean([np.nanmean(song[inst]["SDR"]) for song in test_metrics]) for inst in args.instruments}
    avg_SIRs = {inst : np.mean([np.nanmean(song[inst]["SIR"]) for song in test_metrics]) for inst in args.instruments}
    for inst in args.instruments:
        writer.add_scalar("test_SDR_" + inst, avg_SDRs[inst], state["step"])
        writer.add_scalar("test_SIR_" + inst, avg_SIRs[inst], state["step"])
    overall_SDR = np.mean([v for v in avg_SDRs.values()])
    writer.add_scalar("test_SDR", overall_SDR)
    print("SDR: " + str(overall_SDR))

    writer.close()
Пример #28
0
    x_rep_combined, y_combined, x_rep_list, y_list = utils.get_rep(
        envs, rep_model)

    credit_before, absolute_weights = trainer.compute_variance_cheating(
        flags["l1_penalty"], linear_predictor, x_rep_list, y_list,
        x_rep_combined, y_combined, device)

    magnitude = list(linear_predictor.parameters())[0]
    max_v_arg = torch.argmax(credit_before.squeeze())
    min_mag_arg = torch.argmin(magnitude.squeeze())

    max_v_val_before = credit_before.squeeze()[max_v_arg]
    min_mag_val_before = magnitude.squeeze()[min_mag_arg]
    credit_before = torch.sum(torch.abs(credit_before)**2)

    loss_before = utils.compute_loss(linear_predictor, x_rep_combined,
                                     y_combined)

    if rep_steps % 10 == 0:
        rep_model.perturb_layer(0, 0.2 * random.random())
        credit_before_real = copy.deepcopy(credit_before)
    else:
        rep_model.perturb_feature(min_mag_arg, 0.2 * random.random())

    x_rep_combined, y_combined, x_rep_list, y_list = utils.get_rep(
        envs, rep_model)

    # Cheating to speed up the experiment; we could compute this online, but that would take longer.
    credit_after, weights_after = trainer.compute_variance_cheating(
        flags["l1_penalty"], linear_predictor, x_rep_list, y_list,
        x_rep_combined, y_combined, device)
Пример #29
0
def train(epoch,
          model,
          train_loader,
          optimizer,
          writer,
          sigma_0,
          lr_sigma,
          iters_sig,
          gaussian_num=1,
          lamda=0.0,
          gamma=0.0,
          gaussian_num_ds=1):
    model = model.train()
    train_loss = 0
    total = 0
    correct = 0
    # CE_loss = nn.CrossEntropyLoss()
    for batch_idx, (batch, targets, idx) in enumerate(train_loader):
        optimizer.zero_grad()

        batch_size = len(idx)
        batch = batch.to(device)
        targets = targets.to(device)

        # model.eval()
        sigma, _ = get_sigma(model,
                             batch,
                             lr_sigma,
                             sigma_0[idx],
                             iters_sig,
                             device,
                             gaussian_num=gaussian_num_ds)
        # model.train()
        sigma_0[idx] = sigma  # updating sigma

        #repeating the input for computing the macer loss
        new_shape = [batch_size * gaussian_num]
        new_shape.extend(batch[0].shape)
        batch = batch.repeat((1, gaussian_num, 1, 1)).view(new_shape)
        #repeating sigmas to do the monte carlo
        sigma_repeated = sigma.repeat(
            (1, gaussian_num, 1, 1)).view(-1, 1, 1, 1)
        noise = torch.randn_like(batch) * sigma_repeated

        batch_corrupted = batch + noise

        outputs_softmax = model(batch_corrupted).reshape(
            batch_size, gaussian_num, 1000).mean(1)  #1000 here is for ImageNet
        # clean_output = model(batch)

        total_loss = compute_loss(outputs_softmax, targets)
        if torch.isnan(outputs_softmax).any() or torch.isnan(total_loss).any():
            print('F**k')
        total_loss += lamda * macer_loss(outputs_softmax, targets, sigma,
                                         gamma)
        # clean_loss = compute_loss(clean_output, targets)
        # total_loss += clean_loss

        train_loss += total_loss.item() * len(batch)
        _, predicted = outputs_softmax.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # update parameters
        total_loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(
                '+ Epoch: {}. Iter: [{}/{} ({:.0f}%)]. Loss: {}. Accuracy: {}'.
                format(epoch, batch_idx * len(batch),
                       len(train_loader.dataset),
                       100. * batch_idx / len(train_loader),
                       train_loss / total, 100. * correct / total))

    n = min(batch.size(0), 8)
    comparison = torch.cat([batch[:n], batch_corrupted[:n]])
    comparison = torch.clamp(comparison, min=0, max=1)
    fig = plot_samples(comparison.detach().cpu().numpy().transpose(
        0, 2, 3, 1).squeeze(),
                       h=2,
                       w=n)

    writer.add_figure('sample of noisy trained examples', fig, epoch)
    writer.add_scalar('loss/train_loss', train_loss / total, epoch)
    writer.add_scalar('accuracy/train_accuracy', 100. * correct / total, epoch)
    writer.add_scalar('sigma/train_sigma_mean', sigma_0.mean().item(), epoch)
    writer.add_scalar('sigma/train_sigma_min', sigma_0.min().item(), epoch)
    writer.add_scalar('sigma/train_sigma_max', sigma_0.max().item(), epoch)

    return sigma_0
Пример #30
0
def learner(args):
    comm_cross = global_dict['comm_cross']
    hvd.init(comm=comm_cross)
    torch.cuda.set_device(hvd.local_rank())
    env = wrap_atari_dqn(make_atari(args['env']), args)
    # utils.set_global_seeds(args['seed'], use_torch=True)

    device = args['device']
    model = DuelingDQN(env, args).to(device)
    if os.path.exists('model.pth'):
        # model.load_state_dict(torch.load('model.pth'))
        pass

    tgt_model = DuelingDQN(env, args).to(device)
    del env

    writer = SummaryWriter(log_dir=os.path.join(
        args['log_dir'], f'{global_dict["unit_idx"]}-learner'))
    # optimizer = torch.optim.SGD(model.parameters(), 1e-5 * args['num_units'], momentum=0.8)
    # optimizer = torch.optim.RMSprop(model.parameters(), args['lr'], alpha=0.95, eps=1.5e-7, centered=True)
    optimizer = torch.optim.Adam(model.parameters(),
                                 args['lr'] * args['num_units'])
    optimizer = hvd.DistributedOptimizer(
        optimizer, named_parameters=model.named_parameters())
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    tgt_model.load_state_dict(model.state_dict())
    if args['dynamic_gradient_clip']:
        grad_norm_running_mean = args['gradient_norm_running_mean']
        grad_norm_lambda = args['gradient_norm_lambda']

    batch_queue = queue.Queue(maxsize=3)
    prios_queue = queue.Queue(maxsize=4)
    param_queue = queue.Queue(maxsize=3)
    threading.Thread(target=recv_batch, args=(batch_queue, )).start()
    threading.Thread(target=send_prios, args=(prios_queue, )).start()
    threading.Thread(target=send_param, args=(param_queue, )).start()
    if global_dict['unit_idx'] == 0:
        threading.Thread(target=send_param_evaluator,
                         args=(param_queue, )).start()

    prefetcher = data_prefetcher(batch_queue, args['cuda'])

    learn_idx = 0
    ts = time.time()
    tb_dict = {
        k: []
        for k in [
            'loss', 'grad_norm', 'max_q', 'mean_q', 'min_q',
            'batch_queue_size', 'prios_queue_size'
        ]
    }
    first_rount = True
    while True:
        (*batch, idxes) = prefetcher.next()
        if first_rount:
            print("start training")
            sys.stdout.flush()
            first_rount = False
        loss, prios, q_values = utils.compute_loss(model, tgt_model, batch,
                                                   args['n_steps'],
                                                   args['gamma'])

        optimizer.zero_grad()
        loss.backward()
        if args['dynamic_gradient_clip']:
            grad_norm = torch.nn.utils.clip_grad_norm_(
                model.parameters(),
                grad_norm_running_mean * args['clipping_threshold'])
            grad_norm_running_mean = grad_norm_running_mean * grad_norm_lambda + \
                min(grad_norm, grad_norm_running_mean * args['clipping_threshold']) * (1-grad_norm_lambda)
        else:
            grad_norm = torch.norm(
                torch.stack([
                    torch.norm(p.grad.detach(), 2) for p in model.parameters()
                ]), 2)
        # global_prios_sum = np.array(prios_sum)
        # comm_cross.Allreduce(MPI.IN_PLACE, global_prios_sum.data)
        # global_prios_sum = float(global_prios_sum)
        # scale = prios_sum / global_prios_sum
        if args['dynamic_gradient_clip'] and args[
                'dropping_threshold'] and grad_norm > grad_norm_running_mean * args[
                    'dropping_threshold']:
            pass
        else:
            optimizer.step()

        prios_queue.put((idxes, prios))
        learn_idx += 1
        tb_dict["loss"].append(float(loss))
        tb_dict["grad_norm"].append(float(grad_norm))
        tb_dict["max_q"].append(float(torch.max(q_values)))
        tb_dict["mean_q"].append(float(torch.mean(q_values)))
        tb_dict["min_q"].append(float(torch.min(q_values)))
        tb_dict["batch_queue_size"].append(batch_queue.qsize())
        tb_dict["prios_queue_size"].append(prios_queue.qsize())

        if learn_idx % args['target_update_interval'] == 0:
            tgt_model.load_state_dict(model.state_dict())
        if learn_idx % args['save_interval'] == 0 and global_dict[
                'unit_idx'] == 0:
            torch.save(model.state_dict(), "model.pth")
        if learn_idx % args['publish_param_interval'] == 0:
            param_queue.put(model.state_dict())
        if learn_idx % args['tb_interval'] == 0:
            bps = args['tb_interval'] / (time.time() - ts)
            for i, (k, v) in enumerate(tb_dict.items()):
                writer.add_scalar(f'learner/{i+1}_{k}', np.mean(v), learn_idx)
                v.clear()
            writer.add_scalar(f"learner/{i+2}_BPS", bps, learn_idx)
            ts = time.time()