예제 #1
0
def index(request):
    if not Authorize.authorize(request.user):
        return HttpResponseRedirect("amrs_user_validation/access_denied")
    device = u.get_device(request)

    if device["is_mobile"]:
        return render(request, "amrs_reports/index_mobile.html", {})
    else:
        return render(request, "amrs_reports/index.html", {})
예제 #2
0
def adv_sample_papernot(model_name, dataset, target):
    '''
	data contains (image, target_original_label) from a dataset class
	target is the class to which the data is being misclassified
	'''
    device = utilities.get_device(1)

    EPOCHS = 10
    LAMBDA = 20.0
    NUM_SAMPLES = 10
    EPSILON = 0.5

    samples = []

    L2_loss = nn.MSELoss().to(device)
    Classification_loss = nn.CrossEntropyLoss().to(device)

    model = torch.load("saved_models/" + model_name).to(device)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=True)

    idx = 0
    samples = torch.zeros([NUM_SAMPLES] + list(dataset[0][0].shape))

    # init target
    target_ = target
    target = torch.zeros((1, model.n_classes))
    target[0][target_] = 1

    for data, label in data_loader:

        data = data.to(device)
        sample, target = Variable(
            data.data.to(device),
            requires_grad=True), Variable(target).to(device)

        sample = torch.clamp(sample, 0, 1)

        for epoch in range(EPOCHS):

            sample, target = Variable(
                sample.data,
                requires_grad=True).to(device), Variable(target).to(device)
            output = torch.sigmoid(model(sample))
            loss = L2_loss(output, target) + LAMBDA * L2_loss(sample, data)
            model.zero_grad()
            loss.backward()

            delta = EPSILON * torch.sign(sample.grad.data)

            sample = Variable(sample.data, requires_grad=True).to(device)
            output = model(sample)
            model.zero_grad()
            output[0][target_].backward(retain_graph=True)

            jacobian_t = sample.grad.data
            jacobian_non_t = torch.zeros(jacobian_t.shape)
            for i in range(model.n_classes):
                if i == target_:
                    continue
                model.zero_grad()
                output[0][i].backward(retain_graph=True)
                jacobian_non_t += sample.grad.data

            saliency = np.zeros(sample.reshape(-1).shape)
            for i in range(sample.shape[2]):
                for j in range(sample.shape[3]):
                    if jacobian_t[0][0][i][j] < 0 or jacobian_non_t[0][0][i][
                            j] > 0:
                        continue
                    saliency[i * sample.shape[3] +
                             j] = jacobian_t[0][0][i][j] * abs(
                                 jacobian_non_t[0][0][i][j])

            indices = np.argsort(saliency)

            with torch.no_grad():
                for i in range(len(indices) - 1, -1, -1):
                    sample, target = Variable(
                        sample.data).to(device), Variable(target).to(device)
                    output = model(sample)
                    pred = output.data.max(1, keepdim=True)[1]
                    if torch.eq(pred[0][0], target[0][target_].long()):
                        print("Done")
                        break
                    sample[0][0][indices[i] // sample.shape[3]][
                        indices[i] % sample.shape[3]] -= delta[0][0][
                            indices[i] // sample.shape[3]][indices[i] %
                                                           sample.shape[3]]
                    sample = (sample - torch.min(sample)) / (
                        torch.max(sample) - torch.min(sample))

        samples[idx] = sample[0]
        idx += 1

        if idx == NUM_SAMPLES:
            break

    return samples
예제 #3
0
def adv_sample(model_name, dataset, target, num_samples):
    '''
	data contains (image, target_original_label) from a dataset class
	target is the class to which the data is being misclassified
	'''
    device = utilities.get_device(1)

    EPOCHS = 5000
    LAMBDA = 10.0
    EPSILON = 0.25
    NUM_SAMPLES = num_samples

    L2_loss = nn.MSELoss().to(device)
    Classification_loss = nn.CrossEntropyLoss().to(device)

    model = None
    if device == utilities.get_device(0):
        model = torch.load("saved_models/" + model_name,
                           map_location='cpu').to(device)
    else:
        model = torch.load("saved_models/" + model_name).to(device)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=True)

    idx = 0
    samples = torch.zeros([NUM_SAMPLES] + list(dataset[0][0].shape))

    # init target
    target_ = target
    target = torch.zeros((1, model.n_classes))
    target[0][target_] = 1
    names = []
    for data, label, name in data_loader:

        data = data.to(device)
        sample, target = Variable(
            data.to(device), requires_grad=True), Variable(target).to(device)

        sample = (sample - torch.min(sample)) / (torch.max(sample) -
                                                 torch.min(sample))
        # sample = torch.clamp(sample,0,1)

        for epoch in range(EPOCHS):

            sample, target = Variable(
                sample.data,
                requires_grad=True).to(device), Variable(target).to(device)
            output = torch.sigmoid(model(sample))
            loss = L2_loss(output, target) + LAMBDA * L2_loss(sample, data)
            model.zero_grad()
            loss.backward()

            sample = sample - EPSILON * sample.grad.data

            sample = (sample - torch.min(sample)) / (torch.max(sample) -
                                                     torch.min(sample))
            # sample = torch.clamp(sample,0,1)

        names.append(name[0])
        samples[idx] = sample[0]
        output = model(sample)
        pred = output.data.max(1, keepdim=True)[1]
        print(pred[0][0].item(),
              label.item())  # predicted by our model, predicted by oracle
        idx += 1

        if idx == NUM_SAMPLES:
            break

    return samples, names
def actor_critic(agent_name,
                 multiple_agents=False,
                 n_episodes=300,
                 max_t=1000):
    """ Batch processed the states in a single forward pass with a single neural network
    Params
    ======
        multiple_agents (boolean): boolean for multiple agents
        PER (boolean): 
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
    """
    env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env(
        multiple_agents)

    device = get_device()
    scores_window = deque(maxlen=100)
    scores = np.zeros(num_agents)
    scores_episode = []

    max_trajectory_size = max_t // 10
    min_trajectory_size = 1
    agent = Actor_Crtic_Agent(brain_name, agent_name, device, state_size,
                              action_size, num_agents, BUFFER_SIZE, BATCH_SIZE,
                              RANDOM_SEED, max_trajectory_size,
                              min_trajectory_size)

    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations

        agent.reset()
        scores = np.zeros(num_agents)

        for t in range(max_t):
            actions, values, log_probs = agent.act(states)
            env_info = env.step(actions)[
                brain_name]  # send the action to the environment
            next_states = env_info.vector_observations  # get the next state
            rewards = env_info.rewards  # get the reward
            dones = env_info.local_done

            if multiple_agents:
                agent.step(states, actions, rewards, next_states, dones,
                           values, log_probs)
            else:
                agent.step(states, np.expand_dims(actions, axis=0), rewards,
                           next_states, dones, values, log_probs)

            states = next_states
            scores += rewards
            if t % 20:
                print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'.
                      format(t, np.mean(scores), np.min(scores),
                             np.max(scores)),
                      end="")
            if np.any(dones):
                break

        score = np.mean(scores)
        scores_window.append(score)  # save most recent score
        scores_episode.append(score)

        print(
            '\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}\tMax Score: {:.2f}'
            .format(i_episode, score, np.mean(scores_window), np.max(scores)),
            end="\n")
        update_csv(agent_name, i_episode, np.mean(scores_window),
                   np.max(scores))
        agent.save_agent(agent_name)

        # Early stop
        if i_episode == 100:
            return scores_episode

        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))
        if np.mean(scores_window) >= 30.0:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode - 100, np.mean(scores_window)))
            agent.save_agent(agent_name + "Complete")
            break

    return scores_episode
예제 #5
0
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch.nn.functional as F
import torch.nn as nn

from noise import OUNoise, GaussianExploration
from agent import D4PGAgent
from train import train
from utilities import Seeds, initialize_env, get_device
from memory import NStepReplayBuffer

MULTI = True
device = get_device()  # gets gpu if available

environment_params = {
    'multiple_agents': MULTI,  # runs 20 or 1 arm environment
    'no_graphics': False,  # runs no graphics windows version
    'train_mode': True,  # runs in train mode
    'offline': True,  # toggle on for udacity jupyter notebook
    'agent_count': 20 if MULTI else 1,
    'device': device
}

env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env(
    environment_params)

seedGenerator = Seeds('seeds')
seedGenerator.next()
예제 #6
0
def main():
    # Loading all data sets
    training_data, validation_data, test_data = get_data_sets()
    device = get_device()
    accuracy_threshold = 0.86

    # model_params
    layers = 1
    vocab_size = get_vocab_size()
    embedding_dim = 400
    hidden_size = 256
    lstm_dropout = 0
    layer_dropout = 0

    # training_params
    loss_function = "BCELoss"
    optimizer = "Adam"
    batch_size = 512
    epochs = 4
    weight_decay = 0
    learning_rate = 1e-3

    # Model params
    # Training params
    model = RNNmodel(vocab_size, embedding_dim, hidden_size, lstm_dropout,
                     layer_dropout, layers)
    # To load a pre-trained model
    # model.load_state_dict(torch.load("./state"))

    train(training_data, model, loss_function, learning_rate, epochs, device,
          batch_size, optimizer, weight_decay)
    # TODO: Add more evaluation metrics (precision,recall)

    training_accuracy = evaluate(training_data, model, device)
    # TODO: Use an F1 score between precision,recall of both training and validation sets
    if (training_accuracy > accuracy_threshold):
        torch.save(model.state_dict(), "./state")

    validation_accuracy = evaluate(validation_data, model, device)

    print("TRAINING ACCURACY", training_accuracy)
    print("VALIDATION ACCURACY", validation_accuracy)

    #test_accuracy = evaluate(test_data, model, device)

    params_dict = {
        "model_params": {
            "layers": layers,
            "vocab_size": vocab_size,
            "embedding_dim": embedding_dim,
            "hidden_size": hidden_size,
            "lstm_dropout": lstm_dropout,
            "layer_dropout": layer_dropout
        },
        "training_params": {
            "loss_function": loss_function,
            "optimizer": optimizer,
            "batch_size": batch_size,
            "learning_rate": learning_rate,
            "epochs": epochs,
            "weight_decay": weight_decay
        }
    }

    log_trial(params_dict, training_accuracy, validation_accuracy)
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from utilities import get_device

device = get_device()

WEIGHT_LOW = -3e-2
WEIGHT_HIGH = 3e-2


def initialize_weights(model, low, high):
    for param in model.parameters():
        param.data.uniform_(low, high)


class Actor(nn.Module):
    """Actor (Policy) Model."""
    def __init__(self, params):
        """Initialize parameters and build model.
        Params
        ======
            params (dict-lie): dictionary of parameters
        """
        super(Actor, self).__init__()
        self.state_size = params['state_size']
        self.action_size = params['action_size']
예제 #8
0
                    dest='yolotest',
                    default=False,
                    action='store_true')
parser.add_argument("--stitch",
                    help="Stitch adv images into original",
                    dest='stitch',
                    default=False,
                    action='store_true')

_a = parser.parse_args()
args = {}

for a in vars(_a):
    args[a] = getattr(_a, a)

device = utilities.get_device(1)

dataset = get_Crop_Dataset()
utilities.print_dataset_details(dataset)

if args['bb']:
    input_shape = list(dataset["train"][0][0].shape)

    conv = [input_shape[0]]
    fc = []
    n_classes = 10

    epochs = 20  # int(input("Epochs: "))
    batch_size = 1000  # int(input("batch_size: "))
    lr = 0.01  # float(input("lr: "))
def ddpg(multiple_agents=False, PER=False, n_episodes=300, max_t=1000):
    """ Deep Deterministic Policy Gradients
    Params
    ======
        multiple_agents (boolean): boolean for multiple agents
        PER (boolean): 
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
    """
    env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env(
        multiple_agents)

    device = get_device()
    scores_window = deque(maxlen=100)
    scores = np.zeros(num_agents)
    scores_episode = []

    agents = []
    # shared_memory = NaivePrioritizedBuffer(device, BUFFER_SIZE, BATCH_SIZE)
    shared_memory = PrioritizedBuffer(device, BUFFER_SIZE, BATCH_SIZE)
    for agent_id in range(num_agents):
        agents.append(
            Actor_Crtic_Agent(brain_name, agent_id, device, state_size,
                              action_size))

    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations

        for agent in agents:
            agent.reset()

        scores = np.zeros(num_agents)

        for t in range(max_t):
            actions = np.array(
                [agents[i].act(states[i]) for i in range(num_agents)])
            env_info = env.step(actions)[
                brain_name]  # send the action to the environment
            next_states = env_info.vector_observations  # get the next state
            rewards = env_info.rewards  # get the reward
            dones = env_info.local_done

            for i in range(num_agents):
                agents[i].step(states[i], actions[i], rewards[i],
                               next_states[i], dones[i], shared_memory)

            if shared_memory.batch_passed():
                agents[0].learn(shared_memory)
                agents = share_learning(agents[0].actor_local, agents)

            states = next_states
            scores += rewards
            if t % 20:
                print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'.
                      format(t, np.mean(scores), np.min(scores),
                             np.max(scores)),
                      end="")
            if np.any(dones):
                break

        score = np.mean(scores)
        scores_window.append(score)  # save most recent score
        scores_episode.append(score)

        print('\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}'.format(
            i_episode, score, np.mean(scores_window)),
              end="\n")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))
        if np.mean(scores_window) >= 30.0:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode - 100, np.mean(scores_window)))
            # torch.save(Agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            # torch.save(Agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            break

    return scores_episode
예제 #10
0
def actor_critic(agent_name,
                 multiple_agents=False,
                 load_agent=False,
                 n_episodes=300,
                 max_t=1000,
                 train_mode=True):
    """ Batch processed the states in a single forward pass with a single neural network
    Params
    ======
        multiple_agents (boolean): boolean for multiple agents
        PER (boolean): 
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
    """
    start = time.time()
    device = get_device()
    env, env_info, states, state_size, action_size, brain_name, num_agents = initialize_env(
        multiple_agents, train_mode)
    states = torch.from_numpy(states).to(device).float()

    NUM_PROCESSES = num_agents

    # Scores is Episode Rewards
    scores = np.zeros(num_agents)
    scores_window = deque(maxlen=100)
    scores_episode = []

    actor_critic = ActorCritic(state_size, action_size, device).to(device)
    agent = A2C_ACKTR(agent_name,
                      actor_critic,
                      value_loss_coef=CRITIC_DISCOUNT,
                      entropy_coef=ENTROPY_BETA,
                      lr=LEARNING_RATE,
                      eps=EPS,
                      alpha=ALPHA,
                      max_grad_norm=MAX_GRAD_NORM,
                      acktr=False,
                      load_agent=load_agent)

    rollouts = SimpleRolloutStorage(NUM_STEPS, NUM_PROCESSES, state_size,
                                    action_size)
    rollouts.to(device)

    num_updates = NUM_ENV_STEPS // NUM_STEPS // NUM_PROCESSES
    # num_updates = NUM_ENV_STEPS // NUM_STEPS

    print("\n## Loaded environment and agent in {} seconds ##\n".format(
        round((time.time() - start), 2)))

    update_start = time.time()
    timesteps = 0
    episode = 0
    if load_agent != False:
        episode = agent.episode
    while True:
        """CAN INSERT LR DECAY HERE"""
        # if episode == MAX_EPISODES:
        #     return scores_episode

        # Adds noise to agents parameters to encourage exploration
        # agent.add_noise(PARAMETER_NOISE)

        for step in range(NUM_STEPS):
            step_start = time.time()

            # Sample actions
            with torch.no_grad():
                values, actions, action_log_probs, _ = agent.act(states)

            clipped_actions = np.clip(actions.cpu().numpy(), *ACTION_BOUNDS)
            env_info = env.step(actions.cpu().numpy())[
                brain_name]  # send the action to the environment
            next_states = env_info.vector_observations  # get the next state
            rewards = env_info.rewards  # get the reward
            rewards_tensor = np.array(env_info.rewards)
            rewards_tensor[rewards_tensor == 0] = NEGATIVE_REWARD
            rewards_tensor = torch.from_numpy(rewards_tensor).to(
                device).float().unsqueeze(1)
            dones = env_info.local_done
            masks = torch.from_numpy(1 - np.array(dones).astype(int)).to(
                device).float().unsqueeze(1)

            rollouts.insert(states, actions, action_log_probs, values,
                            rewards_tensor, masks, masks)

            next_states = torch.from_numpy(next_states).to(device).float()
            states = next_states
            scores += rewards
            # print(rewards)

            if timesteps % 100:
                print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'.
                      format(timesteps, np.mean(scores), np.min(scores),
                             np.max(scores)),
                      end="")

            if np.any(dones):
                print(
                    '\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}\tMin Score: {:.2f}\tMax Score: {:.2f}'
                    .format(episode, score, np.mean(scores_window),
                            np.min(scores), np.max(scores)),
                    end="\n")
                update_csv(agent_name, episode, np.mean(scores_window),
                           np.max(scores))

                if episode % 20 == 0:
                    agent.save_agent(agent_name,
                                     score,
                                     episode,
                                     save_history=True)
                else:
                    agent.save_agent(agent_name, score, episode)

                episode += 1
                scores = np.zeros(num_agents)
                break

            timesteps += 1

        with torch.no_grad():
            next_values, _, _, _ = agent.act(next_states)

        rollouts.compute_returns(next_values, USE_GAE, GAMMA, GAE_LAMBDA)
        agent.update(rollouts)

        score = np.mean(scores)
        scores_window.append(score)  # save most recent score
        scores_episode.append(score)

    return scores_episode