def one_model_evaluation(self, model, epsilon, exclude_wrong_predictions,
                             targeted, true_labels, all_preds, entropies):
        adv_acc = []
        ood_entropies = np.zeros(0)

        for im, crit in self.test_loader:
            adv_results, predictions = construct_adversarial_examples(
                im, crit, self.method, model, self.device, epsilon,
                exclude_wrong_predictions, targeted)
            _, advs, _ = adv_results

            advs = advs.cpu()
            new_x = []
            for image in advs:
                if image.dim() > 3:
                    image = self.transform(image.squeeze(0)).unsqueeze(0)
                else:
                    image = self.transform(image).unsqueeze(0)
                new_x.append(image)
            advs = torch.cat(new_x)

            advs = advs.to(self.device)

            adv_acc.append((model.forward(advs).argmax(dim=-1).cpu().flatten()
                            == crit).float().sum().numpy() / len(im))

            x = advs

            out = model(x)
            probs = F.softmax(out, dim=-1)
            preds, indices = torch.max(probs, dim=-1)

            entropy = Categorical(probs).entropy().squeeze()
            ood_entropies = np.concatenate(
                (ood_entropies, entropy.detach().cpu().numpy()))
            entropies = np.concatenate(
                (entropies, entropy.detach().cpu().numpy()))

            true_labels = np.concatenate((true_labels, np.zeros(len(x))))
            all_preds = np.concatenate(
                (all_preds, preds.detach().cpu().reshape((-1))))

        auroc = calculate_auroc(true_labels, all_preds)
        aupr = calculate_aupr(true_labels, all_preds)

        auroc_entropy = calculate_auroc(1 - true_labels, entropies)
        aupr_entropy = calculate_aupr(1 - true_labels, entropies)

        return np.mean(
            adv_acc), auroc, aupr, auroc_entropy, aupr_entropy, np.mean(
                ood_entropies)
示例#2
0
 def forward(self, state_rep):
     logits = self.net(state_rep)
     log_probs = F.log_softmax(logits, dim=(-1))
     probs = torch.exp(log_probs)
     arg = Categorical(probs).sample()
     arg = arg.detach().cpu().numpy()
     return arg.reshape(-1, 1), log_probs[range(len(arg)), arg], log_probs
示例#3
0
 def forward(self, state_rep):
     """
     Input
     -----
     state_rep: (batch_size, n_features)
     
     Returns
     -------
     arg: (batch_size, n_args)
     log_prob: (batch_size, n_args)
     """
     logits = self.net(state_rep).view(
         -1, self.n_args, self.max_size)  # (batch_size, n_args, max_size)
     # Infer device from spatial_params_net output with parallel_log_prob.is_cuda
     if logits.is_cuda:
         device = 'cuda'  # Assume only 1 GPU device is used
     else:
         device = 'cpu'
     self.sizes_mask = self.sizes_mask.to(device)
     log_probs = F.log_softmax(logits.masked_fill(self.sizes_mask.bool(),
                                                  float('-inf')),
                               dim=(-1))
     probs = torch.exp(log_probs)
     arg = Categorical(probs).sample()  #(batch_size, n_args)
     log_prob = log_probs.view(-1, self.max_size)[torch.arange(arg.shape[0]*arg.shape[1]), arg.flatten()]\
                 .view(arg.shape[0], arg.shape[1])
     arg = arg.detach().cpu().numpy()
     return arg, log_prob
示例#4
0
 def actor_step(self, env_output):
     spatial_state = env_output['spatial_state'].unsqueeze(0).to(self.device)
     player_state = env_output['player_state'].unsqueeze(0).to(self.device)
     action_mask = env_output['action_mask'].to(self.device)
     #print("action_mask: ", action_mask)
     log_probs, spatial_features, nonspatial_features = self.pi(spatial_state, player_state, action_mask)
     #print("log_probs: ", log_probs)
     probs = torch.exp(log_probs)
     #print("probs: ", probs)
     main_action_torch = Categorical(probs).sample() # check probs < 0?!
     main_action = main_action_torch.detach().cpu().numpy()
     log_prob = log_probs[range(len(main_action)), main_action]
     
     args, args_log_prob, args_indexes = self.sample_params(nonspatial_features, spatial_features, main_action)
     assert args_log_prob.shape == log_prob.shape, ("Shape mismatch between arg_log_prob and log_prob ",\
                                                   args_log_prob.shape, log_prob.shape)
     log_prob = log_prob + args_log_prob
     
     action_id = np.array([self.action_table[act] for act in main_action])
     sc2_env_action = [sc_actions.FunctionCall(action_id[i], args[i]) for i in range(len(action_id))]
     
     actor_output = {'log_prob':log_prob.flatten(),
                     'main_action':main_action_torch.flatten(),
                     'sc_env_action':sc2_env_action,
                     **args_indexes} # args_indexes = {'categorical_args_indexes', 'spatial_args_indexes'}
     
     return actor_output
def selfplay_batch_a2c(objs, l_opt, listener, s_opt, speaker, value_coef,
                       ent_coef):
    """ Use a learnt value function """
    # Generate batch
    a2c_info = speaker.a2c(objs)
    oh_msgs = listener.one_hot(a2c_info['msgs'])
    l_logits = listener.get_logits(oh_msgs)

    # Train listener
    l_logprobs = Categorical(logits=l_logits).log_prob(objs)
    l_logprobs = l_logprobs.sum(-1)
    l_opt.zero_grad()
    (-l_logprobs.mean()).backward(retain_graph=True)
    l_opt.step()

    # Policy gradient
    rewards = l_logprobs.detach()
    v_loss = torch.mean((a2c_info['values'] - rewards[:, None]).pow(2))

    adv = (rewards[:, None] - a2c_info['values']).detach()
    reinforce = adv * a2c_info['logprobs']
    p_loss = -reinforce.mean()

    ent_loss = -a2c_info['ents'].mean()

    s_opt.zero_grad()
    (p_loss + value_coef * v_loss + ent_coef * ent_loss).backward()
    s_opt.step()
示例#6
0
 def forward(self, state_rep):
     log_probs = self.get_log_probs(state_rep)
     probs = torch.exp(log_probs)
     torch_arg = Categorical(probs).sample()  #(batch_size, n_args)
     log_prob = log_probs.view(-1, self.max_size)[torch.arange(torch_arg.shape[0]*torch_arg.shape[1]), torch_arg.flatten()]\
                 .view(torch_arg.shape[0], torch_arg.shape[1])
     arg = torch_arg.detach().cpu().numpy()
     return arg, log_prob, torch_arg
示例#7
0
def inspection_step(agent, inspector, state, action_mask):
    spatial_state = state['spatial']
    player_state = state['player']
    spatial_state = torch.from_numpy(spatial_state).float().to(agent.device)
    player_state = torch.from_numpy(player_state).float().to(agent.device)
    action_mask = torch.tensor(action_mask).to(agent.device)

    log_probs, spatial_features, nonspatial_features = agent.AC.pi(
        spatial_state, player_state, action_mask)
    entropy = agent.compute_entropy(log_probs)
    probs = torch.exp(log_probs)
    a = Categorical(probs).sample()
    a = a.detach().cpu().numpy()
    log_prob = log_probs[range(len(a)), a]

    ### Inspection ###
    step_dict = {}
    p = probs.detach().cpu().numpy()
    step_dict['action_distr'] = p
    step_dict['action_sel'] = a

    # Choose top 5 actions from the probabilities - check about the batch dim
    top_5 = np.argsort(p)[:, -5:]
    top_5_actions = np.array(top_5[:, ::-1])[
        0]  # some issues in accessing p if I don't call np.array()
    step_dict['top_5_actions'] = top_5_actions

    # Save SPATIAL distributions only of the top 5 actions + THEIR NAMES
    with torch.no_grad():
        _, _, log_probs = agent.AC.spatial_params_net(spatial_features)
        log_probs = log_probs.detach().cpu().numpy()[
            0]  # batch dim 1 during inspection
        step_dict['top_5_action_distr'] = {}
        for act in top_5_actions:
            step_dict['top_5_action_distr'][act] = {}
            arg_mask = agent.AC.spatial_arg_mask[act, :].astype(bool)
            arg_names = np.array(agent.AC.spatial_arg_names)[arg_mask]
            distr = log_probs[arg_mask].reshape((-1, ) + agent.AC.screen_res)
            for i, name in enumerate(arg_names):
                step_dict['top_5_action_distr'][act][name +
                                                     '_distr'] = distr[i]

    ### End inspection ###

    args, args_log_prob = agent.AC.sample_params(nonspatial_features,
                                                 spatial_features, a)
    step_dict['args'] = args

    log_prob = log_prob + args_log_prob

    action_id = np.array([agent.AC.action_table[act] for act in a])
    action = [
        actions.FunctionCall(action_id[i], args[i])
        for i in range(len(action_id))
    ]

    inspector.store_step(step_dict)
    return action, log_prob, torch.mean(entropy)
示例#8
0
    def evaluate(self, true_labels, all_preds, entropies, **kwargs):
        ood_entropies = np.zeros(0)
        accuracies = []

        with torch.no_grad():
            for batch_num, batch in enumerate(self.ds_loader):
                x, y = batch
                x = x.to(self.device)

                if not self.ensemble:
                    out = self.model(x)
                else:
                    out = 0
                    for model in self.ensemble:
                        out += model(x)
                    out /= len(self.ensemble)
                probs = F.softmax(out, dim=-1)
                preds, _ = torch.max(probs, dim=-1)

                # entropy
                entropy = Categorical(probs).entropy().squeeze()
                entropies = np.concatenate(
                    (entropies, entropy.detach().cpu().numpy()))
                ood_entropies = np.concatenate(
                    (ood_entropies, entropy.cpu().numpy()))

                # accuracy
                predictions = out.argmax(dim=-1, keepdim=True).view_as(y).cpu()
                correct = y.eq(predictions).sum().item()
                acc = correct / out.shape[0]

                accuracies.append(acc)

                true_labels = np.concatenate((true_labels, np.zeros(len(x))))
                all_preds = np.concatenate((all_preds, preds.cpu().reshape(
                    (-1))))

        auroc = calculate_auroc(true_labels, all_preds)
        aupr = calculate_aupr(true_labels, all_preds)

        auroc_entropy = calculate_auroc(1 - true_labels, entropies)
        aupr_entropy = calculate_aupr(1 - true_labels, entropies)

        auroc_name = f'auroc_{self.ds_dataset}'
        aupr_name = f'aupr_{self.ds_dataset}'
        auroc_ent_name = f'auroc_entropy_{self.ds_dataset}'
        aupr_ent_name = f'aupr_entropy_{self.ds_dataset}'
        entropy_name = f'entropy_{self.ds_dataset}'
        acc_name = f"acc_{self.ds_dataset}"

        return {
            acc_name: np.mean(accuracies),
            auroc_name: auroc,
            aupr_name: aupr,
            entropy_name: np.mean(ood_entropies),
            auroc_ent_name: auroc_entropy,
            aupr_ent_name: aupr_entropy
        }
示例#9
0
def inspection_step(agent, state, action_mask):
    state = torch.from_numpy(state).float().to(agent.device)
    action_mask = torch.tensor(action_mask).to(agent.device)
    log_probs, spatial_features, nonspatial_features = agent.AC.pi(
        state, action_mask)
    probs = torch.exp(log_probs)
    entropy = agent.compute_entropy(probs)
    a = Categorical(probs).sample()
    a = a.detach().cpu().numpy()
    ### Inspection ###
    step_dict = {}
    p = probs.detach().cpu().numpy()
    step_dict['action_distr'] = p
    step_dict['action_sel'] = a
    # All this sampling is completely wrong
    with torch.no_grad():
        # select_add
        sel_arg, sel_log_prob, sel_distr = agent.AC.sample_param(
            nonspatial_features, 'select_add')
        p = sel_distr.detach().cpu().numpy()
        step_dict['selectall_distr'] = p
        #step_dict['selectall_sel'] = sel_arg
        # queued
        q_arg, q_log_prob, q_distr = agent.AC.sample_param(
            nonspatial_features, 'queued')
        p = q_distr.detach().cpu().numpy()
        step_dict['queue_distr'] = p
        #step_dict['queue_sel'] = q_arg
        # screen
        screen_arg, screen_log_prob, screen_distr = agent.AC.sample_param(
            spatial_features, 'screen')
        p = screen_distr.detach().cpu().numpy().reshape(state.shape[-2:])
        step_dict['spatial_distr'] = p
        #step_dict['spatial_sel'] = screen_arg
    ### End inspection ###
    log_prob = log_probs[range(len(a)), a]
    action_id = np.array([agent.AC.action_dict[act] for act in a])
    args, args_log_prob, args_entropy = agent.get_arguments(
        spatial_features, nonspatial_features, a)

    if move_only:
        if a[0] != 2:
            step_dict['spatial_sel'] = [0, 0]
        else:
            step_dict['spatial_sel'] = args[0][1]
    log_prob = log_prob + args_log_prob
    entropy = entropy + args_entropy
    action = [
        actions.FunctionCall(action_id[i], args[i])
        for i in range(len(action_id))
    ]

    return action, log_prob, entropy, step_dict
示例#10
0
def inspection_step(agent, inspector, state, action_mask):
    state = torch.from_numpy(state).float().to(agent.device)
    action_mask = torch.tensor(action_mask).to(agent.device)
    log_probs, spatial_features, nonspatial_features = agent.AC.pi(
        state, action_mask)
    probs = torch.exp(log_probs)
    entropy = agent.compute_entropy(probs)
    a = Categorical(probs).sample()
    a = a.detach().cpu().numpy()
    #embedded_a = agent._embed_action(a)
    ### Inspection ###
    step_dict = {}
    p = probs.detach().cpu().numpy()
    step_dict['action_distr'] = p
    step_dict['action_sel'] = a

    # Concatenate embedded action to spatial and nonspatial features
    #spatial_features = agent._cat_action_to_spatial(embedded_a, spatial_features)
    #nonspatial_features = agent._cat_action_to_nonspatial(embedded_a, nonspatial_features)

    # All this sampling is completely wrong - but distributions are ok
    with torch.no_grad():
        for i, name in enumerate(inspector.arg_names):
            if inspector.spatial[i]:
                insp_arg, insp_log_prob, insp_distr = agent.AC.sample_param(
                    spatial_features, name)
                p = insp_distr.detach().cpu().numpy().reshape(state.shape[-2:])
                step_dict[name + '_distr'] = p
            else:
                insp_arg, insp_log_prob, insp_distr = agent.AC.sample_param(
                    nonspatial_features, name)
                p = insp_distr.detach().cpu().numpy()
                step_dict[name + '_distr'] = p
    ### End inspection ###
    log_prob = log_probs[range(len(a)), a]

    action_id = np.array([agent.AC.action_dict[act] for act in a])
    args, args_log_prob, args_entropy = agent.get_arguments(
        spatial_features, nonspatial_features, a)
    step_dict['args'] = args

    log_prob = log_prob + args_log_prob
    entropy = entropy + args_entropy
    action = [
        actions.FunctionCall(action_id[i], args[i])
        for i in range(len(action_id))
    ]

    inspector.store_step(step_dict)
    return action, log_prob, entropy
示例#11
0
    def evaluate(self, true_labels, all_preds, entropies, **kwargs):
        ood_entropies = np.zeros(0)

        with torch.no_grad():
            for batch_num, batch in enumerate(self.ood_loader):
                x, y = batch
                x = x.float().to(self.device)

                if not self.ensemble:
                    out = self.model(x)
                else:
                    out = 0
                    for model in self.ensemble:
                        out += model(x)
                    out /= len(self.ensemble)
                probs = F.softmax(out, dim=-1)
                preds, _ = torch.max(probs, dim=-1)

                entropy = Categorical(probs).entropy().squeeze()
                entropies = np.concatenate(
                    (entropies, entropy.detach().cpu().numpy()))
                ood_entropies = np.concatenate(
                    (ood_entropies, entropy.cpu().numpy()))

                true_labels = np.concatenate((true_labels, np.zeros(len(x))))
                all_preds = np.concatenate((all_preds, preds.cpu().reshape(
                    (-1))))

        auroc = calculate_auroc(true_labels, all_preds)
        aupr = calculate_aupr(true_labels, all_preds)

        auroc_entropy = calculate_auroc(1 - true_labels, entropies)
        aupr_entropy = calculate_aupr(1 - true_labels, entropies)

        auroc_name = f'auroc_{self.ood_dataset}'
        aupr_name = f'aupr_{self.ood_dataset}'
        auroc_ent_name = f'auroc_entropy_{self.ood_dataset}'
        aupr_ent_name = f'aupr_entropy_{self.ood_dataset}'
        entropy_name = f'entropy_{self.ood_dataset}'

        return {
            auroc_name: auroc,
            aupr_name: aupr,
            entropy_name: np.mean(ood_entropies),
            auroc_ent_name: auroc_entropy,
            aupr_ent_name: aupr_entropy
        }
示例#12
0
    def actor_step(self, env_output, hidden_state=None, cell_state=None):
        screen_layers = env_output['screen_layers'].to(self.device)
        minimap_layers = env_output['minimap_layers'].to(self.device)
        done = env_output['done'].to(self.device).view(1,1)
        player_state = env_output['player_state'].unsqueeze(0).to(self.device)
        last_action = env_output['last_action'].to(self.device) # add it to the output of the environment
        action_mask = env_output['action_mask'].to(self.device)

        # add time and batch dimension 
        screen_layers = screen_layers.view(1,1,*screen_layers.shape[-3:])
        minimap_layers = minimap_layers.view(1,1,*minimap_layers.shape[-3:])
        
        results = self.compute_features(screen_layers, 
                                        minimap_layers, 
                                        player_state, 
                                        last_action, 
                                        hidden_state, 
                                        cell_state,
                                        done
                                       )
        spatial_features, shared_features, hidden_state, cell_state = results
        
        log_probs = self.pi(shared_features, action_mask)
        probs = torch.exp(log_probs)
        main_action_torch = Categorical(probs).sample() # check probs < 0?!
        main_action = main_action_torch.detach().cpu().numpy()
        log_prob = log_probs[range(len(main_action)), main_action]
        
        args, args_log_prob, args_indexes = self.sample_params(shared_features, spatial_features, main_action)
        assert args_log_prob.shape == log_prob.shape, ("Shape mismatch between arg_log_prob and log_prob ",\
                                                      args_log_prob.shape, log_prob.shape)
        log_prob = log_prob + args_log_prob
        
        action_id = np.array([self.action_table[act] for act in main_action])
        sc2_env_action = [sc_actions.FunctionCall(action_id[i], args[i]) for i in range(len(action_id))]
        
        actor_output = {'log_prob':log_prob.flatten(),
                        'main_action':main_action_torch.flatten(),
                        'sc_env_action':sc2_env_action,
                        **args_indexes} # args_indexes = {'categorical_args_indexes', 'spatial_args_indexes'}
        
        return actor_output, (hidden_state, cell_state)
def selfplay_batch(objs, l_opt, listener, s_opt, speaker, ema_reward=None):
    """ Use exponential reward (kinda depricated not working)
    :return updated average reward
    """
    # Generate batch
    idxes = objs[:,
                 5] * 100000 + objs[:,
                                    4] * 10000 + objs[:,
                                                      3] * 1000 + objs[:,
                                                                       2] * 100 + objs[:,
                                                                                       1] * 10 + objs[:,
                                                                                                      0]
    s_logits = speaker(objs)
    msgs = Categorical(logits=s_logits).sample()
    oh_msgs = listener.one_hot(msgs)
    l_logits = listener(oh_msgs)

    # Train listener
    l_logprobs = Categorical(logits=l_logits).log_prob(objs)
    l_logprobs = l_logprobs.sum(-1)
    l_opt.zero_grad()
    (-l_logprobs.mean()).backward(retain_graph=True)
    l_opt.step()
    # Policy gradient
    rewards = l_logprobs.detach()
    values = rewards.numpy()
    # Compute reward average
    if ema_reward is not None:
        ema_reward.update(values, idxes)
    else:
        ema_reward = ExponentialMovingAverager()
        ema_reward.update(values, idxes)
    s_dist = Categorical(s_logits)
    s_logprobs = s_dist.log_prob(msgs).sum(-1)
    reinforce = (rewards - torch.tensor(ema_reward.mean[idxes])) * s_logprobs
    entropy = s_dist.entropy().sum(-1)
    s_opt.zero_grad()
    (-reinforce.mean() - 0.0001 * entropy.mean()).backward()
    s_opt.step()
    return ema_reward
示例#14
0
def evaluate(args, model, tokenizer, prefix="", test=False):
    eval_task_names = (args.task_name, )
    eval_outputs_dirs = (args.output_dir, )
    m = torch.nn.Softmax(dim=1)
    with open(args.data_dir, "r", encoding='utf8') as f:
        inputjson = [json.loads(jline) for jline in f.readlines()]
    results = {}
    ABCD = ["A", "B", "C", "D"]
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset = load_and_cache_examples(args,
                                               eval_task,
                                               tokenizer,
                                               evaluate=not test,
                                               test=test)

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            print("===evaluate===", eval_output_dir)
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # multi-gpu evaluate
        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        entropys = None
        out_label_ids = None
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids":
                    batch[0],
                    "attention_mask":
                    batch[1],
                    "token_type_ids":
                    batch[2] if args.model_type in ["bert", "xlnet"] else
                    None,  # XLM don't use segment_ids
                    "labels":
                    batch[3],
                }
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                logits = m(logits)
                entropy = Categorical(probs=logits).entropy()
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                entropys = entropy.detach().cpu().numpy()
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                entropys = np.append(entropys,
                                     entropy.detach().cpu().numpy(),
                                     axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs["labels"].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        max_pred = np.argmax(preds, axis=1)
        error = {"errors": []}
        entropy_result = defaultdict(list)
        entropy_list = []
        for i in range(len(max_pred)):
            entropy_result[inputjson[i]['id']].append(
                [entropys[i], inputjson[i]])
        for id, eitem in entropy_result.items():
            max_e_item = sorted(eitem, key=lambda l: l[0], reverse=True)[0]
            entropy_list.append(max_e_item[1])

        for i, (p, t) in enumerate(zip(max_pred, out_label_ids)):
            error['errors'].append([
                entropys[i], "predict:" + ABCD[p], "answer:" + ABCD[t],
                inputjson[i]
            ])
        error['errors'] = sorted(error['errors'],
                                 key=lambda l: l[0],
                                 reverse=True)
        results.update(error)
        preds = np.argmax(preds, axis=1)
        acc = simple_accuracy(preds, out_label_ids)
        result = {"eval_acc": acc, "eval_loss": eval_loss}
        results.update(result)

        output_eval_file = os.path.join(
            eval_output_dir,
            "datadir_" + args.data_dir.replace("/", "").replace(".", "") +
            "_eval_results.txt")
        output_entropy_file = os.path.join(
            eval_output_dir, "datadir_" +
            args.data_dir.replace("/", "").replace(".", "") + "_entropy.jsonl")
        with jsonlines.open(output_entropy_file, mode='w') as writer:
            writer.write_all(entropy_list)

        with open(output_eval_file, "w") as w:
            w.writelines(json.dumps(i) for i in entropy_list)

        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(
                str(prefix) + " is test:" + str(test)))
            writer.write("model           =%s\n" %
                         str(args.model_name_or_path))
            writer.write("total batch size=%d\n" %
                         (args.per_gpu_train_batch_size *
                          args.gradient_accumulation_steps *
                          (torch.distributed.get_world_size()
                           if args.local_rank != -1 else 1)))
            writer.write("train num epochs=%d\n" % args.num_train_epochs)
            writer.write("fp16            =%s\n" % args.fp16)
            writer.write("max seq length  =%d\n" % args.max_seq_length)
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
            for e in error['errors']:
                writer.write(str(e) + "\n")
    return results
示例#15
0
    def _batch_iteration(self,
                         x: torch.Tensor,
                         y: torch.Tensor,
                         train: bool = True,
                         targeted=False,
                         exclude_wrong_predictions=False,
                         return_acc: bool = True,
                         **kwargs):
        """ one iteration of forward-backward """

        # attack stuff
        if train:
            adv_batch_size = int(self._arguments['batch_size'] / 2)
            x_to_adv = kwargs['xun'][:adv_batch_size]
            y_to_adv = y[:adv_batch_size]
            self._model.eval()
            if self._model.is_maskable:
                self._model.apply_weight_mask()
            adv_results, _ = construct_adversarial_examples(
                x_to_adv, y_to_adv, self.attack_method, self._model,
                self._device, self.epsilon, exclude_wrong_predictions,
                targeted)
            _, advs, _ = adv_results

            advs = advs.cpu()
            new_advs = []
            for image in advs:
                image = self.transform(image.squeeze()).unsqueeze(0)
                new_advs.append(image)
            advs = torch.cat(new_advs)

            x = torch.cat((advs, x[adv_batch_size:]))

            self._model.train()

        # unpack
        x, y = x.to(self._device).float(), y.to(self._device)

        # update metrics
        self._metrics.update_batch(train)

        # record time
        if "cuda" in str(self._device):
            start = torch.cuda.Event(enable_timing=True)
            end = torch.cuda.Event(enable_timing=True)
            start.record()

        # forward pass
        if return_acc:
            accuracy, loss, out = self._forward_pass(x,
                                                     y,
                                                     train=train,
                                                     return_acc=return_acc)
        else:
            loss, out = self._forward_pass(x,
                                           y,
                                           train=train,
                                           return_acc=return_acc)

        # backward pass
        if train:
            self._backward_pass(loss)

        # compute entropy
        probs = F.softmax(out, dim=-1)
        entropy = Categorical(probs).entropy().squeeze().mean()

        # get max predicted prob
        preds, _ = torch.max(probs, dim=-1)

        # record time
        if "cuda" in str(self._device):
            end.record()
            torch.cuda.synchronize(self._device)
            time = start.elapsed_time(end)
        else:
            time = 0

        # free memory
        for tens in [out, y, x, loss, entropy, preds]:
            tens.detach()

        if return_acc:
            return accuracy, loss.item(), time, entropy.detach().cpu(
            ), preds.cpu()
        else:
            return loss.item(), time, entropy.detach().cpu(), preds.cpu()
示例#16
0
def inspection_step(agent, inspector, state, action_mask):
    spatial_state = state['spatial']
    player_state = state['player']
    spatial_state = torch.from_numpy(spatial_state).float().to(agent.device)
    player_state = torch.from_numpy(player_state).float().to(agent.device)
    action_mask = torch.tensor(action_mask).to(agent.device)

    log_probs, spatial_features, nonspatial_features = agent.AC.pi(
        spatial_state, player_state, action_mask)
    entropy = agent.compute_entropy(log_probs)
    probs = torch.exp(log_probs)
    a = Categorical(probs).sample()
    a = a.detach().cpu().numpy()
    log_prob = log_probs[range(len(a)), a]

    ### Inspection ###
    step_dict = {}
    p = probs.detach().cpu().numpy()
    step_dict['action_distr'] = p
    step_dict['action_sel'] = a

    # Choose top 5 actions from the probabilities - check about the batch dim
    top_5 = np.argsort(p)[:, -5:]
    top_5_actions = np.array(top_5[:, ::-1])[
        0]  # some issues in accessing p if I don't call np.array()
    #print("top_5_actions: ", top_5_actions, top_5_actions.shape)
    step_dict['top_5_actions'] = top_5_actions

    # Save distributions only of the top 5 actions
    step_dict['top_5_action_distr'] = {}
    with torch.no_grad():
        for act in top_5_actions:
            step_dict['top_5_action_distr'][act] = {}  # first nested level
            arg_names = inspector.act_to_arg_names[act]
            for arg_name in arg_names:
                if inspector.arguments_type[
                        arg_name] == 'spatial':  # it's either 'spatial' or 'categorical'
                    insp_arg, insp_log_prob, insp_distr = agent.AC.sample_param(
                        spatial_features, arg_name)
                    p = insp_distr.detach().cpu().numpy().reshape(
                        spatial_state.shape[-2:])
                else:
                    insp_arg, insp_log_prob, insp_distr = agent.AC.sample_param(
                        nonspatial_features, arg_name)
                    p = insp_distr.detach().cpu().numpy()

                step_dict['top_5_action_distr'][act][
                    arg_name + '_distr'] = p  # second nested level

    ### End inspection ###

    args, args_log_prob, args_entropy = agent.get_arguments(
        spatial_features, nonspatial_features, a)
    step_dict['args'] = args

    log_prob = log_prob + args_log_prob

    action = [actions.FunctionCall(a[i], args[i]) for i in range(len(a))]

    inspector.store_step(step_dict)
    return action, log_prob, torch.mean(entropy)
示例#17
0
    def _batch_iteration_ood(self,
                             x: torch.Tensor,
                             y: torch.Tensor,
                             ood_x: torch.Tensor,
                             ood_y: torch.Tensor,
                             train: bool = True,
                             return_acc: bool = True):
        """ one iteration of forward-backward """

        # unpack
        x, y = x.to(self._device).float(), y.to(self._device)
        ood_x, ood_y = ood_x.to(self._device).float(), ood_y.to(self._device)

        # update metrics
        self._metrics.update_batch(train)

        # record time
        if "cuda" in str(self._device):
            start = torch.cuda.Event(enable_timing=True)
            end = torch.cuda.Event(enable_timing=True)
            start.record()

        # forward pass
        if return_acc:
            accuracy, loss, out = self._forward_pass_ood(x,
                                                         y,
                                                         ood_x,
                                                         ood_y,
                                                         train=train,
                                                         return_acc=return_acc)
        else:
            loss, out = self._forward_pass_ood(x,
                                               y,
                                               ood_x,
                                               ood_y,
                                               train=train,
                                               return_acc=return_acc)

        if self._arguments['prune_criterion'] == 'RigL':
            self._handle_pruning(self._metrics._epoch)

        # backward pass
        if train:
            self._backward_pass(loss)

        # compute entropy
        probs = F.softmax(out, dim=-1)
        entropy = Categorical(probs).entropy().squeeze().mean()

        # get max predicted prob
        preds, _ = torch.max(probs, dim=-1)

        # record time
        if "cuda" in str(self._device):
            end.record()
            torch.cuda.synchronize(self._device)
            time = start.elapsed_time(end)
        else:
            time = 0

        # free memory
        for tens in [out, y, x, loss, entropy, preds]:
            tens.detach()

        if return_acc:
            return accuracy, loss.item(), time, entropy.detach().cpu(
            ), preds.cpu()
        else:
            return loss.item(), time, entropy.detach().cpu(), preds.cpu()
示例#18
0
def train(args, nets, optimizers, env, obs_size, n_drones):

    icm_model_name = "ICM_" if args.enable_icm else ""

    log_file = f"A2C_{icm_model_name}{args.policy}.log"
    logging.basicConfig(filename=log_file, level=logging.INFO, format="%(message)s")

    steps = []
    total_steps = 0
    ep_rewards = 0.0
    grad_step = 0

    if args.enable_icm:
        icm = ICM(obs_size=obs_size, action_space=env.action_size)

    pbar = tqdm(total=args.total_steps)
    while total_steps < args.total_steps:

        obs = env.reset()
        drone_pos = np.array(env.n_drones_pos)

        obs, drone_pos = prepare_inputs(args, obs, drone_pos, n_drones, obs_size)
        curr_state = obs
        avg_rewards = []

        for _ in range(args.rollout_steps):

            # network forward pass
            policies = []
            values = []
            actions = []
            for i in range(n_drones):
                p, v = nets[i](obs, drone_pos)
                probs = F.softmax(p, dim=-1)
                a = Categorical(probs).sample()[0]

                policies.append(p)
                values.append(v)
                actions.append(a.detach().unsqueeze(0).numpy())

            # gather env data, reset done envs and update their obs
            obs, rewards, dones = env.step(actions)
            ep_rewards += rewards
            if dones:
                ep_rewards = 0.0
                obs = env.reset()
            drone_pos = np.array(env.n_drones_pos)
            obs, drone_pos = prepare_inputs(args, obs, drone_pos, n_drones, obs_size)
            next_state = obs
            avg_rewards.append(rewards)
            if args.enable_icm:
                ## ICM for one drone
                rewards += icm(
                    a_t=torch.tensor(actions[0], dtype=torch.long),
                    a_t_logits=policies[0].detach(),
                    s_t=curr_state,
                    s_t1=next_state,
                )

            # reset the LSTM state for done envs
            masks = (
                1.0 - torch.from_numpy(np.array([dones], dtype=np.float32))
            ).unsqueeze(1)

            total_steps += 1
            pbar.update(1)

            rewards = torch.tensor([rewards]).float().unsqueeze(1)

            actions = torch.tensor(actions)
            policies = torch.cat(policies)
            values = torch.cat(values)
            steps.append((rewards, masks, actions, policies, values))

        final_obs = obs
        final_drone_pos = drone_pos
        final_values = []
        for i in range(n_drones):
            _, final_v = nets[i](final_obs, final_drone_pos)
            final_values.append(final_v)

        final_values = torch.cat(final_values)
        steps.append((None, None, None, None, final_values))

        actions, policies, values, returns, advantages = process_rollout(args, steps)

        probs = F.softmax(policies, dim=-1)
        log_probs = F.log_softmax(policies, dim=-1)
        log_action_probs = log_probs.clone()

        policy_loss = (-log_action_probs * Variable(advantages)).mean()
        value_loss = advantages.pow(2).mean()
        entropy_loss = (-log_probs * probs).mean()

        loss = (
            policy_loss
            + value_loss * args.value_coeff
            + entropy_loss * args.entropy_coeff
        )

        loss.backward()

        if (grad_step + 1) % args.grad_acc == 0:
            for i in range(n_drones):
                torch.nn.utils.clip_grad_norm_(
                    nets[i].parameters(), args.grad_norm_limit
                )
                optimizers[i].step()
                optimizers[i].zero_grad()
        grad_step += 1

        steps = []

        if total_steps % args.save_freq == 0:
            for i in range(n_drones):
                torch.save(
                    nets[i].state_dict(),
                    f"A2C_models/{args.policy}_policy/A2C_drone_{icm_model_name}{i}.bin",
                )

        pbar.set_postfix(loss=loss.item(), reward=np.mean(avg_rewards))

        logging.info(f"loss: {loss.item()}, reward: {np.mean(avg_rewards)}")
示例#19
0
    def _batch_iteration(self,
                         x: torch.Tensor,
                         y: torch.Tensor,
                         train: bool = True,
                         return_acc: bool = True,
                         **kwargs):
        """ one iteration of forward-backward """

        # unpack
        x, y = x.to(self._device).float(), y.to(self._device)

        # update metrics
        self._metrics.update_batch(train)

        # record time
        if "cuda" in str(self._device):
            start = torch.cuda.Event(enable_timing=True)
            end = torch.cuda.Event(enable_timing=True)
            start.record()

        # forward pass
        if return_acc:
            accuracy, loss, out = self._forward_pass(x,
                                                     y,
                                                     train=train,
                                                     return_acc=return_acc)
        else:
            loss, out = self._forward_pass(x,
                                           y,
                                           train=train,
                                           return_acc=return_acc)

        if self._arguments['prune_criterion'] == 'RigL':
            self._handle_pruning(self._metrics._epoch)

        # backward pass
        if train:
            self._backward_pass(loss)

        # compute entropy
        probs = F.softmax(out, dim=-1)
        entropy = Categorical(probs).entropy().squeeze().mean()

        # get max predicted prob
        preds, _ = torch.max(probs, dim=-1)

        # AUGERINO ''''''''''''''
        # loss = 0
        # accuracy = 0
        # out = 0
        # for i, (im, crit) in enumerate(zip(x, y)):
        #     batch = [im.unsqueeze(0)]
        #     batch_y = [torch.tensor(crit)]
        #     for _ in range(4):
        #         batch.append(self.augerino(im.unsqueeze(0)))
        #         batch_y.append(torch.tensor(crit))
        #     batch = torch.cat(batch)
        #     batch_y = torch.tensor(batch_y, device=self._device)
        #     if return_acc:
        #         _accuracy, _loss, _out = self._forward_pass(batch, batch_y, train=train, return_acc=return_acc)
        #         accuracy += (_accuracy)
        #     else:
        #         _loss, _out = self._forward_pass(batch, batch_y, train=train, return_acc=return_acc)
        #     loss += _loss
        #     out += _out
        # loss = loss / len(x)
        # accuracy = accuracy / len(x)
        # out = out / len(x)
        # '''''''''''''''''''''''

        # record time
        if "cuda" in str(self._device):
            end.record()
            torch.cuda.synchronize(self._device)
            time = start.elapsed_time(end)
        else:
            time = 0

        # free memory
        for tens in [out, y, x, loss, entropy, preds]:
            tens.detach()

        if return_acc:
            return accuracy, loss.item(), time, entropy.detach().cpu(
            ), preds.cpu()
        else:
            return loss.item(), time, entropy.detach().cpu(), preds.cpu()
def entropy_categorical(categorical_parameters):
    entropy = Categorical(categorical_parameters).entropy()
    # TODO: discuss whether we want numpy in these functions
    assert_no_nan_no_inf(entropy)
    entropy = entropy.detach().numpy()
    return entropy
示例#21
0
 def get_llhs(self, obs, actions):
     with torch.no_grad():
         pi = self.p_net(obs)
         llhs = Categorical(pi).log_prob(actions)
     return llhs.detach()