示例#1
0
class Valuator(object):
    def __init__(self):
        vals = torch.load(
            "nets/value_100K.pth", map_location=lambda storage, loc: storage
        )  # these lines make sure that the CPU is used instead of the GPU.
        self.model = Net()
        self.model.load_state_dict(vals)

    def __call__(
            self, s
    ):  # this returns a value representing how good a move actually is.
        brd = s.serialize()[None]  # QUESTION: Why is this [None]?
        output = self.model(torch.tensor(brd).float())
        return float(output.data[0][0])
    env = make_vec_env(env_id,
                       'atari',
                       1,
                       seed,
                       wrapper_kwargs={
                           'clip_rewards': False,
                           'episode_life': False,
                       })

    env = VecFrameStack(env, 4)

    # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    reward_net = Net()
    reward_net.load_state_dict(torch.load(args.pretrained_network))
    reward_net.to(device)

    #create policy net with pretrained layers from T-REX at the beginning
    policy_net = PolicyNet(reward_net, env)
    policy_net.to(device)

    #run random search over weights
    #best_reward = random_search(reward_net, demonstrations, 40, stdev = 0.01)
    if not args.eval:
        best_policy, best_perf = random_search(policy_net, reward_net,
                                               args.num_rand_steps,
                                               args.rand_step_size, env,
                                               env_name, args.num_rollouts,
                                               seed)
    print([a[0] for a in zip(learning_returns, demonstrations)])
    demonstrations = [x for _, x in sorted(zip(learning_returns,demonstrations), key=lambda pair: pair[0])]

    sorted_returns = sorted(learning_returns)
    print(sorted_returns)
    print("lengths")
    print([len(d) for d in demonstrations])


    # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if args.trex:
        print("using TREX network from ICML")
        reward_net = Net()
    else:
        reward_net = EmbeddingNet(args.encoding_dims)
    reward_net.load_state_dict(torch.load(args.pretrained_network, map_location=device))
    #reinitialize last layer
    num_features = reward_net.fc2.in_features

    print("reward is linear combination of ", num_features, "features")
    reward_net.to(device)
    #freeze all weights so there are no gradients (we'll manually update the last layer via proposals so no grads required)
    for param in reward_net.parameters():
        param.requires_grad = False

    #get num_demos by num_features + 1 (bias) numpy array with (un-discounted) feature counts from pretrained network
    directories = args.pretrained_network.split("/") #split on directories to get the last past
    filename = directories[-1] #last element should be the name of the pretrained network
示例#4
0
dataset = ReadDataset(csv_file)

# Split into training and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = random_split(dataset, [train_size, test_size])

# Data loaders
trainloader = DataLoader(trainset, batch_size=100, shuffle=True)
testloader = DataLoader(testset, batch_size=5_000, shuffle=False)

# Use gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Neural Network
nnet = Net(dataset.__shape__()).to(device)
## Load pretrained state
nnet.load_state_dict(torch.load("output/weights0.pt"))

# Loss function
criterion = nn.BCELoss()

# Optimizer
optimizer = optim.Adam(nnet.parameters(),
                       lr=0.0001,
                       betas=(0.9, 0.999),
                       eps=1e-08,
                       weight_decay=0.000001)

# Train the net
loss_per_iter = []
示例#5
0
 def __init__(self):
     vals = torch.load(
         "nets/value_100K.pth", map_location=lambda storage, loc: storage
     )  # these lines make sure that the CPU is used instead of the GPU.
     self.model = Net()
     self.model.load_state_dict(vals)
示例#6
0
    #sort the demonstrations according to ground truth reward to simulate ranked demos

    print([a[0] for a in zip(learning_returns, demonstrations)])
    demonstrations = [
        x for _, x in sorted(zip(learning_returns, demonstrations),
                             key=lambda pair: pair[0])
    ]

    sorted_returns = sorted(learning_returns)
    print(sorted_returns)

    # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    reward_net = Net()
    reward_net.load_state_dict(torch.load(args.pretrained_network))
    if not args.use_map:
        #reinitialize last layer
        num_features = reward_net.fc2.in_features

        print("reward is linear combination of ", num_features, "features")
        reward_net.fc2 = nn.Linear(
            num_features,
            1)  #last layer just outputs the scalar reward = w^T \phi(s)
        #load the mean of the MCMC chain
        burn = 1000
        skip = 5
        reader = open(args.chain_path)
        data = []
        for line in reader:
示例#7
0
"""
Live classification
"""
import sys

import cv2
import numpy as np
import torch

from nnet import Net
from utils import TRANSFORM, draw_game

if __name__ == "__main__":
    VID_FEED = cv2.VideoCapture(-1)
    SIZE = None
    model = Net(5)
    if torch.cuda.is_available():
        print('Found Cuda')
        model = model.cuda()

    classes = model.load(sys.argv[1])

    i = 0
    TEXT = ""
    with torch.no_grad():
        while True:
            RET, FRAME = VID_FEED.read()
            if not RET:
                print("Unable to capture video")
                sys.exit()
            elif SIZE is None:
    #sort the demonstrations according to ground truth reward to simulate ranked demos

    print([a[0] for a in zip(learning_returns, demonstrations)])
    demonstrations = [
        x for _, x in sorted(zip(learning_returns, demonstrations),
                             key=lambda pair: pair[0])
    ]

    sorted_returns = sorted(learning_returns)
    print(sorted_returns)

    # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    reward_net = Net()
    reward_net.load_state_dict(torch.load(args.pretrained_network))
    #reinitialize last layer
    num_features = reward_net.fc2.in_features

    print("reward is linear combination of ", num_features, "features")
    reward_net.fc2 = nn.Linear(
        num_features,
        1)  #last layer just outputs the scalar reward = w^T \phi(s)
    reward_net.to(device)
    #freeze all weights so there are no gradients (we'll manually update the last layer via proposals so no grads required)
    for param in reward_net.parameters():
        param.requires_gradient = False
    #get num_demos by num_features + 1 (bias) numpy array with (un-discounted) feature counts from pretrained network
    demo_cnts = generate_feature_counts(demonstrations, reward_net)
    print(demo_cnts)
示例#9
0
    validation_dataset_loader = DataLoader(val_dataset,
                                           batch_size=batch_size,
                                           num_workers=num_workers,
                                           shuffle=shuffle)

    test_dataset_loader = DataLoader(test_dataset,
                                     batch_size=batch_size,
                                     num_workers=num_workers,
                                     shuffle=shuffle)
    return training_data_loader, validation_dataset_loader, test_dataset_loader, classes


# In[4]:

net = Net(NUM_CHANNELS)
try:
    net = net.load("../models/1573584871.3391135_12.model")
except Exception as e:
    print("NNET NOT FOUND: ", str(e))
    pass

CUDA = False
if torch.cuda.is_available():
    CUDA = True
    print('Cuda found')
    device = torch.cuda.current_device()
    print(device)
    net = net.cuda()

trainloader, valloader, testloader, classes = load_dataset_from_folder(