예제 #1
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    # vec: (batch_size, graph_size, embed_dim)
    # bl_val: (batch_size) or None
    vec, bl_val = baseline.unwrap_batch(batch)
    vec = move_to(vec, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(vec)
    # normalize cost
    # cost = (cost - cost.mean()) / (cost.std() + 1e-5)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(vec, cost) if bl_val is None else (bl_val,
                                                                       0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, tb_logger, opts)
예제 #2
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts, extra):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    cost, log_likelihood, entropy = model(x)
    # print('Cost on train', cost)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)
    if bl_loss == True and opts.baseline == 'constant':
        extra["updates"] += 1

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    entropy_loss = entropy.mean()
    loss = reinforce_loss + bl_loss - opts.entropy * entropy_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, entropy_loss, tb_logger, opts,
                   extra)
예제 #3
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    if opts.input_transform != None:
        cost, log_likelihood, transform_loss = model(
            x, return_transform_loss=True)
    else:
        cost, log_likelihood = model(x)
    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss
    if opts.input_transform != None:
        loss += opts.input_transform * transform_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, tb_logger, opts)
예제 #4
0
파일: train.py 프로젝트: GuyLor/DirPG-TSP
def train_batch(model, optimizer, baseline, epoch, batch_id, step,
                interactions_count, batch, tb_logger, opts):
    start_time = time.time()
    get_inner_model(model).decoder.count_interactions = True
    get_inner_model(baseline.baseline.model).decoder.count_interactions = True
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities

    cost, log_likelihood = model(x, only_encoder=False)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)
    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    global sum_batch_time
    sum_batch_time += (time.time() - start_time)
    # Logging
    interactions_so_far = interactions_count +\
                          2*get_inner_model(model).decoder.interactions*torch.cuda.device_count()

    if step % int(opts.log_step) == 0:
        #with torch.no_grad():
        #    opt_nll = get_inner_model(model).supervised_log_likelihood(x)
        opt_nll = torch.ones(1)
        log_values(cost, grad_norms, epoch, interactions_so_far, batch_id,
                   step, log_likelihood, opt_nll, reinforce_loss, bl_loss,
                   tb_logger, opts)

        #interactions_so_far = 2 * step * opts.graph_size * opts.batch_size \
        #    if opts.baseline is not None else step * opts.graph_size * opts.batch_size

        if opts.total_interactions > interactions_so_far:
            avg_batch_time = sum_batch_time / step if step > 0 else sum_batch_time
            print('batch time: ', avg_batch_time)

            seconds = (opts.total_interactions -
                       interactions_so_far) * avg_batch_time
            #seconds = (opts.n_epochs*(opts.epoch_size // opts.batch_size) - step) * avg_batch_time
            GetTime(seconds)
            print('============================')

    get_inner_model(model).decoder.count_interactions = False
    get_inner_model(baseline.baseline.model).decoder.count_interactions = False

    if opts.use_cuda:
        torch.cuda.empty_cache()
def train_batch(
        model,
        optimizer,
        baseline,
        epoch,
        batch_id,
        step,
        batch,
        tb_logger,
        opts
):
    x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) 
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None
    
    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py

    eps = {
        'padded_observations': x,
        'padded_rewards': cost,
        'lengths': 1,
        'observations': x,
        'rewards': cost,
        'actions': 
    }

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step,
                   log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)
예제 #6
0
def train_batch(model, optimizer, scaler, baseline, epoch, batch_id, step,
                batch, opts):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    if scaler is not None:
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            cost, log_likelihood = model(x)
            bl_val, bl_loss = baseline.eval(
                x, cost) if bl_val is None else (bl_val, 0)
            reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
            loss = reinforce_loss + bl_loss
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        grad_norms = clip_grad_norms(optimizer.param_groups,
                                     opts.max_grad_norm)
        scaler.step(optimizer)
        scaler.update()

    else:
        # Evaluate model, get costs and log probabilities
        cost, log_likelihood = model(x)

        # Evaluate baseline, get baseline loss if any (only for critic)
        bl_val, bl_loss = baseline.eval(
            x, cost) if bl_val is None else (bl_val, 0)

        # Calculate loss
        reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
        loss = reinforce_loss + bl_loss

        # Perform backward pass and optimization step
        optimizer.zero_grad()
        loss.backward()
        # Clip gradient norms and get (clipped) gradient norms for logging
        grad_norms = clip_grad_norms(optimizer.param_groups,
                                     opts.max_grad_norm)
        optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0 and torch.distributed.get_rank() == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, opts)
예제 #7
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    x, bl_val = baseline.unwrap_batch(batch)
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    cost, log_likelihood = model(x)

    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    reiforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reiforce_loss + bl_loss

    optimizer.zero_grad()
    loss.backward()

    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reiforce_loss, bl_loss, tb_logger, opts)
예제 #8
0
def train_batch(model, optimizer, baseline, epoch, batch_id, step, batch,
                tb_logger, opts):
    # Unwrap baseline
    bat, bl_val = baseline.unwrap_batch(batch)

    # Optionally move Tensors to GPU
    x = move_to(bat['nodes'], opts.device)
    graph = move_to(bat['graph'], opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None

    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(x, graph)

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, graph,
                                    cost) if bl_val is None else (bl_val, 0)

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Normalize loss for gradient accumulation
    loss = loss / opts.accumulation_steps

    # Perform backward pass
    loss.backward()

    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)

    # Perform optimization step after accumulating gradients
    if step % opts.accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step, log_likelihood,
                   reinforce_loss, bl_loss, tb_logger, opts)
예제 #9
0
def train(model, optimizer, train_dataset, val_dataset, tb_logger, opts):
    training_dataloader = DataLoader(dataset=train_dataset,
                                     batch_size=opts.batch_size,
                                     shuffle=True,
                                     num_workers=16)
    step = 0
    model.train()
    for epoch in tqdm(range(opts.n_epochs)):
        running_loss = 0.0
        running_accu = 0.0
        for train_batch in training_dataloader:
            train_batch.to(opts.device)
            start_time = time.time()

            optimizer.zero_grad()
            train_batch_out, _ = model(train_batch, compute_embeddings=False)

            # regresssion loss(MSE)
            # train_batch_loss = F.mse_loss(train_batch_out, train_batch.y)

            # classification code by binarizing score
            # train_batch_y = torch.tensor(train_batch.y > 0.0)
            # train_batch_y.to(opts.device)
            # train_batch_y = train_batch_y.type(torch.cuda.FloatTensor)
            # train_batch_loss = F.binary_cross_entropy(train_batch_out, train_batch_y)
            # train_batch_accu = accuracy(output=train_batch_out, target=train_batch_y, threshold=0.5)
            # classification code

            # node classification by binary labels
            train_batch_loss = F.binary_cross_entropy(train_batch_out,
                                                      train_batch.y)
            train_batch_accu = accuracy(output=train_batch_out,
                                        target=train_batch.y,
                                        threshold=0.5)
            train_batch_loss.backward()
            optimizer.step()

            running_accu += train_batch_accu.item()
            running_loss += train_batch_loss.item()

        # train loss at each epoch
        train_loss = running_loss / len(training_dataloader)

        # train accuracy at each epoch
        train_accu = running_accu / len(training_dataloader) * 100

        # val loss and accuracy at each epoch
        val_loss, val_accu = evaluate(model, val_dataset, opts)

        # Logging
        if step % int(opts.log_step) == 0:
            # wandb logging
            wandb.log({
                "train_loss": train_loss,
                "val_loss": val_loss,
                "train_accu": train_accu,
                "val_accu": val_accu
            })

            # Tensorboard logging
            log_values(epoch, step, tb_logger, opts, train_loss, val_loss)

        step += 1
        epoch_duration = time.time() - start_time
        print("Finished epoch {}, took {} s".format(
            epoch, time.strftime('%H:%M:%S', time.gmtime(epoch_duration))))

    return "Training Completed!"
def train_batch(
        model,
        optimizer,
        baseline,
        epoch,
        batch_id,
        step,
        batch,
        tb_logger,
        opts
):
    x, bl_val = baseline.unwrap_batch(batch) ##x are states or nodes - (obs in vpg.py) 
    x = move_to(x, opts.device)
    bl_val = move_to(bl_val, opts.device) if bl_val is not None else None
    
    # Evaluate model, get costs and log probabilities
    cost, log_likelihood = model(x) ##likelihood/probability is POLICY distribution used to pick actions or paths and cost is a vector with tour times(this could be rewards). Check _compute_policy_entropy in vpg.py
    
    #Check sizes
    print('---Checking data Sizes---')
    for key_x in x:
        print('Batch ID:', batch_id, '->', key_x, '->', x[key_x].shape)

    print('Batch ID:',batch_id,'-> Cost ->',cost.shape)

    #Synthetic construction of required Garage input
    obs_garage =  x['loc'].clone().detach().cpu()
    rewards_garage = cost.clone().detach().cpu().numpy()
    #padded_rewards_garage = pad_tensor(rewards_garage, len(rewards_garage), mode='last')
  
    padded_rewards_garage = rewards_garage.reshape(rewards_garage.shape[0],1)

    lens = [(obs_garage.shape[1]-1) for i in range(obs_garage.shape[0])]
    
    eps_dict = {
        'padded_observations': obs_garage,
        'padded_rewards': padded_rewards_garage,
        'lengths': lens,
        'observations': obs_garage,
        'rewards': rewards_garage,
        'actions': obs_garage
    }

    eps = SimpleNamespace(**eps_dict)

    env_spec_dict = {
      'max_episode_length': 20
    }
    env_spec = SimpleNamespace(**env_spec_dict)

    vpg = VPG(
      env_spec = env_spec,
      policy = None,
      value_function = None,
      sampler = None,
      policy_optimizer = optimizer,
      vf_optimizer = optimizer
    )
    print('VPG run' + str(vpg._train_once(1, eps)))
    

    # Evaluate baseline, get baseline loss if any (only for critic)
    bl_val, bl_loss = baseline.eval(x, cost) if bl_val is None else (bl_val, 0)

    #print('VPG run loss: ' + str(vpg._compute_advantage(cost, lens, bl_val)))

    # Calculate loss
    reinforce_loss = ((cost - bl_val) * log_likelihood).mean()
    loss = reinforce_loss + bl_loss

    # Perform backward pass and optimization step
    optimizer.zero_grad()
    loss.backward()
    # Clip gradient norms and get (clipped) gradient norms for logging
    grad_norms = clip_grad_norms(optimizer.param_groups, opts.max_grad_norm)
    optimizer.step()

    # Logging
    if step % int(opts.log_step) == 0:
        log_values(cost, grad_norms, epoch, batch_id, step,
                   log_likelihood, reinforce_loss, bl_loss, tb_logger, opts)