class Valuator(object): def __init__(self): vals = torch.load( "nets/value_100K.pth", map_location=lambda storage, loc: storage ) # these lines make sure that the CPU is used instead of the GPU. self.model = Net() self.model.load_state_dict(vals) def __call__( self, s ): # this returns a value representing how good a move actually is. brd = s.serialize()[None] # QUESTION: Why is this [None]? output = self.model(torch.tensor(brd).float()) return float(output.data[0][0])
env = make_vec_env(env_id, 'atari', 1, seed, wrapper_kwargs={ 'clip_rewards': False, 'episode_life': False, }) env = VecFrameStack(env, 4) # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") reward_net = Net() reward_net.load_state_dict(torch.load(args.pretrained_network)) reward_net.to(device) #create policy net with pretrained layers from T-REX at the beginning policy_net = PolicyNet(reward_net, env) policy_net.to(device) #run random search over weights #best_reward = random_search(reward_net, demonstrations, 40, stdev = 0.01) if not args.eval: best_policy, best_perf = random_search(policy_net, reward_net, args.num_rand_steps, args.rand_step_size, env, env_name, args.num_rollouts, seed)
print([a[0] for a in zip(learning_returns, demonstrations)]) demonstrations = [x for _, x in sorted(zip(learning_returns,demonstrations), key=lambda pair: pair[0])] sorted_returns = sorted(learning_returns) print(sorted_returns) print("lengths") print([len(d) for d in demonstrations]) # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.trex: print("using TREX network from ICML") reward_net = Net() else: reward_net = EmbeddingNet(args.encoding_dims) reward_net.load_state_dict(torch.load(args.pretrained_network, map_location=device)) #reinitialize last layer num_features = reward_net.fc2.in_features print("reward is linear combination of ", num_features, "features") reward_net.to(device) #freeze all weights so there are no gradients (we'll manually update the last layer via proposals so no grads required) for param in reward_net.parameters(): param.requires_grad = False #get num_demos by num_features + 1 (bias) numpy array with (un-discounted) feature counts from pretrained network directories = args.pretrained_network.split("/") #split on directories to get the last past filename = directories[-1] #last element should be the name of the pretrained network
dataset = ReadDataset(csv_file) # Split into training and test train_size = int(0.8 * len(dataset)) test_size = len(dataset) - train_size trainset, testset = random_split(dataset, [train_size, test_size]) # Data loaders trainloader = DataLoader(trainset, batch_size=100, shuffle=True) testloader = DataLoader(testset, batch_size=5_000, shuffle=False) # Use gpu if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Neural Network nnet = Net(dataset.__shape__()).to(device) ## Load pretrained state nnet.load_state_dict(torch.load("output/weights0.pt")) # Loss function criterion = nn.BCELoss() # Optimizer optimizer = optim.Adam(nnet.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.000001) # Train the net loss_per_iter = []
def __init__(self): vals = torch.load( "nets/value_100K.pth", map_location=lambda storage, loc: storage ) # these lines make sure that the CPU is used instead of the GPU. self.model = Net() self.model.load_state_dict(vals)
#sort the demonstrations according to ground truth reward to simulate ranked demos print([a[0] for a in zip(learning_returns, demonstrations)]) demonstrations = [ x for _, x in sorted(zip(learning_returns, demonstrations), key=lambda pair: pair[0]) ] sorted_returns = sorted(learning_returns) print(sorted_returns) # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") reward_net = Net() reward_net.load_state_dict(torch.load(args.pretrained_network)) if not args.use_map: #reinitialize last layer num_features = reward_net.fc2.in_features print("reward is linear combination of ", num_features, "features") reward_net.fc2 = nn.Linear( num_features, 1) #last layer just outputs the scalar reward = w^T \phi(s) #load the mean of the MCMC chain burn = 1000 skip = 5 reader = open(args.chain_path) data = [] for line in reader:
""" Live classification """ import sys import cv2 import numpy as np import torch from nnet import Net from utils import TRANSFORM, draw_game if __name__ == "__main__": VID_FEED = cv2.VideoCapture(-1) SIZE = None model = Net(5) if torch.cuda.is_available(): print('Found Cuda') model = model.cuda() classes = model.load(sys.argv[1]) i = 0 TEXT = "" with torch.no_grad(): while True: RET, FRAME = VID_FEED.read() if not RET: print("Unable to capture video") sys.exit() elif SIZE is None:
#sort the demonstrations according to ground truth reward to simulate ranked demos print([a[0] for a in zip(learning_returns, demonstrations)]) demonstrations = [ x for _, x in sorted(zip(learning_returns, demonstrations), key=lambda pair: pair[0]) ] sorted_returns = sorted(learning_returns) print(sorted_returns) # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") reward_net = Net() reward_net.load_state_dict(torch.load(args.pretrained_network)) #reinitialize last layer num_features = reward_net.fc2.in_features print("reward is linear combination of ", num_features, "features") reward_net.fc2 = nn.Linear( num_features, 1) #last layer just outputs the scalar reward = w^T \phi(s) reward_net.to(device) #freeze all weights so there are no gradients (we'll manually update the last layer via proposals so no grads required) for param in reward_net.parameters(): param.requires_gradient = False #get num_demos by num_features + 1 (bias) numpy array with (un-discounted) feature counts from pretrained network demo_cnts = generate_feature_counts(demonstrations, reward_net) print(demo_cnts)
validation_dataset_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle) test_dataset_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle) return training_data_loader, validation_dataset_loader, test_dataset_loader, classes # In[4]: net = Net(NUM_CHANNELS) try: net = net.load("../models/1573584871.3391135_12.model") except Exception as e: print("NNET NOT FOUND: ", str(e)) pass CUDA = False if torch.cuda.is_available(): CUDA = True print('Cuda found') device = torch.cuda.current_device() print(device) net = net.cuda() trainloader, valloader, testloader, classes = load_dataset_from_folder(