def __init__(self, envs, args): self.value_loss_coefficient = args.value_loss_weight self.entropy_coefficient = args.entropy_weight self.learning_rate = args.lr self.envs = envs self.map = args.map self.env_num = args.envs self.save = args.save_eposides self.save_dir = args.save_dir self.processor = Preprocessor(self.envs.observation_spec()[0], self.map, args.process_screen) self.sum_score = 0 self.n_steps = 8 self.gamma = 0.999 self.sum_episode = 0 self.total_updates = -1 if args.process_screen: self.net = CNN(348, 1985).cuda() else: self.net = CNN().cuda() self.optimizer = optim.Adam(self.net.parameters(), self.learning_rate, weight_decay=0.01)
def fit(self, texts_seq, texts_labels,epochs, batch_size, model=None): if model is None: preprocess = self.preprocess model = CNN(preprocess.num_words, preprocess.sentence_len, 128, len(preprocess.label_set) ) model.fit(texts_seq, texts_labels, epochs=epochs, batch_size=batch_size) self.model = model
def main(config): word2vec_model = gensim.models.Word2Vec.load(config.pretrained_word_vector) word2vec_model.wv["<pad>"] = np.zeros(word2vec_model.wv.vector_size) word2vec_model.wv["<unk>"] = np.zeros(word2vec_model.wv.vector_size) preprocessor = Preprocessor(word2vec_model) train_dataloader = get_dataloader(config.train_data, config.max_len, preprocessor, config.batch_size) val_dataloader = get_dataloader(config.val_data, config.max_len, preprocessor, config.batch_size) logger = TensorBoardLogger(config.log_dir, config.cnn_type, config.task) model_checkpoint = ModelCheckpoint( dirpath=f"checkpoint/{config.cnn_type}/{config.task}", filename="cnn-{epoch:02d}-{val_loss:.5f}", save_top_k=-1, ) early_stopping = EarlyStopping("val_loss") net = CNN(word2vec_model.wv, config) trainer = pl.Trainer( distributed_backend=config.distributed_backend, gpus=config.gpus, max_epochs=20, logger=logger, callbacks=[model_checkpoint, early_stopping], ) trainer.fit(net, train_dataloader, val_dataloader)
def __init__(self, envs): self.value_loss_coefficient = 0.5 self.entropy_coefficient = 0.05 self.learning_rate = 1e-4 self.envs = envs self.processor = Preprocessor(self.envs.observation_spec()[0]) self.sum_score = 0 self.last_score = 0 self.n_steps = 8 self.gamma = 0.99 self.sum_episode = 0 self.total_updates = -1 self.net = CNN().cuda() self.optimizer = optim.Adam(self.net.parameters(), self.learning_rate, weight_decay=0.01)
def main(config): word2vec_model = gensim.models.Word2Vec.load(config.pretrained_word_vector) word2vec_model.wv["<pad>"] = np.zeros(word2vec_model.wv.vector_size) word2vec_model.wv["<unk>"] = np.zeros(word2vec_model.wv.vector_size) preprocessor = Preprocessor(word2vec_model) test_dataloader = get_dataloader( config.test_data, config.max_len, preprocessor, config.batch_size ) net = CNN(word2vec_model.wv, config) checkpoint = torch.load(config.ckpt_path) net.load_state_dict(checkpoint["state_dict"]) trainer = pl.Trainer( distributed_backend=config.distributed_backend, gpus=config.gpus, ) res = trainer.test(net, test_dataloader)
def __init__(self, envs): self.value_loss_coefficient = 0.5 self.entropy_coefficient = 0.05 self.learning_rate = 1e-4 self.envs = envs self.env_num=8 self.processor = Preprocessor(self.envs.observation_spec()[0]) self.sum_score = 0 self.n_steps = 512 self.gamma = 0.999 self.clip=0.27 self.sum_episode = 0 self.total_updates = -1 self.net = CNN().cuda() self.old_net = copy.deepcopy(self.net) self.old_net.cuda() self.epoch=4 self.batch_size=8 self.optimizer = optim.Adam( self.net.parameters(), self.learning_rate, weight_decay=0.01)
def predict(**kwargs): model = kwargs.get('model', 1) if model == 1: outputsize = 8 elif model == 2: outputsize = 22 cnn = CNN(outputsize = outputsize) with tf.Graph().as_default(): dmgr= DataManager(dir = './data/test.tfrecords', batchsize = 1, test=True, model=model) evaler = Solver(data=dmgr, net=cnn, batchsize = 1, test = True, model = model) y, x = evaler.predict() return y, x
def main(): parser = argparse.ArgumentParser(description='regression of kWh') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() model = CNN() # load model chainer.serializers.load_npz(args.out + '/' + g_model_filename, model) print_predict(model, args.out)
def train(**kwargs): model = kwargs.get('model', 1) if model == 1: outputsize = 8 tfpath = './data/train1' elif model == 2: outputsize = 22 tfpath = './data/train2' cnn = CNN(outputsize = outputsize) with tf.Graph().as_default(): dmgr = DataManager(dir = tfpath, model = model, count = 2000, batchsize = 200) trainer = Solver(data=dmgr, net=cnn, model = model, maxiter = 2000, lr = 0.001) print("Start training model ", model," : ") trainer.train() print("Complete training model ", model)
def main(args): # Build Models encoder = CNN(args.hidden_size) encoder.eval() # evaluation mode (BN uses moving mean/variance) decoder = LSTM(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Load the trained model parameters encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) # load data set is_training = True testing_data = IDDataset(not is_training) # If use gpu if torch.cuda.is_available(): encoder.cuda() decoder.cuda() test_acc = evaluation(testing_data, encoder, decoder) print("Accuracy is %.4f" % test_acc)
import torch from net import MLP, CNN # from torchvision import datasets, transforms from sklearn.metrics import multilabel_confusion_matrix # test_loader = torch.utils.data.DataLoader(datasets.FashionMNIST( './fashionmnist_data/', train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=1, shuffle=True) model = CNN() device = torch.device('cpu') model = model.to(device) model.load_state_dict(torch.load('output/CNN.pt')) model.eval() pres = [] labels = [] i = 0 for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability pres.append(pred[0][0].item()) labels.append(target[0].item()) mcm = multilabel_confusion_matrix(labels, pres) #mcm print(mcm)
# data train_dataset = WindowedData("/Users/fdhcg/Desktop/clshen/data/test.txt", WSIZE) train_loader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) test_dataset = WindowedData("/Users/fdhcg/Desktop/clshen/data/1998.txt", WSIZE) test_loader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) cnn = CNN().cuda() #optimizer optimizer = torch.optim.Adam(cnn.parameters(), lr=LR, weight_decay=1) loss_fun = Myloss().cuda() #training loop for epoch in range(EPOCH): for i, (x, y) in enumerate(train_loader): batch_x = Variable(x).cuda() batch_y = Variable(y).cuda() #输入训练数据 output = cnn(batch_x) #计算误差 loss = loss_fun(output, batch_y) #清空上一次梯度
key = folder key+= args.net \ + '_hdim' + str(args.hdim) \ + '_Batchsize' + str(args.Batchsize) \ + '_lr' + str(args.lr) \ + '_Nsteps' + str(args.Nsteps) \ + '_epsilon' + str(args.epsilon) cmd = ['mkdir', '-p', key] subprocess.check_call(cmd) if args.net == 'MLP': net = MLP(dim=dim, hidden_size = args.hdim, use_z2=False) elif args.net == 'CNN': net = CNN(L=length, channel=channel, hidden_size = args.hdim, use_z2=False) elif args.net == 'Simple_MLP': net = Simple_MLP(dim=dim, hidden_size = args.hdim, use_z2=False) else: print ('what network ?', args.net) sys.exit(1) model = MongeAmpereFlow(net, args.epsilon, args.Nsteps, device=device, name = key) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) if args.checkpoint is not None: try: load_checkpoint(args.checkpoint, model, optimizer) print('load checkpoint', args.checkpoint) except FileNotFoundError:
if self.boardnorm: game.normalize() return game.board, game.movability if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--boardnorm', action='store_true') parser.add_argument('--out', default='result') args = parser.parse_args() model = MultiLabelClassifier(CNN()) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) dataset = Dataset(2048, args.boardnorm) iter_ = chainer.iterators.SerialIterator(dataset, args.batchsize) print( 'chance rate: ', sum(dataset[i][1].mean() for i in range(len(dataset))) / len(dataset))
window_adv = 'adversarial image' cv2.namedWindow(window_adv, cv2.WINDOW_FREERATIO) cv2.createTrackbar('eps', window_adv, 1, 255, nothing) orig = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) #orig = cv2.resize(orig, (IMG_SIZE, IMG_SIZE)) img = orig.copy().astype(np.float32) perturbation = np.empty_like(orig) mean = [0.5] std = [0.5] img /= 255.0 img = (img - mean) / std # load model model1 = CNN(1, 10) saved1 = torch.load('relu.pkl', map_location='cpu') model1.load_state_dict(saved1) model1.eval() criterion = nn.CrossEntropyLoss() device = 'cuda' if gpu else 'cpu' # prediction before attack inp = Variable( torch.from_numpy(img).to(device).float().unsqueeze(0).unsqueeze(0), requires_grad=True)
class PPO(): def __init__(self, envs): self.value_loss_coefficient = 0.5 self.entropy_coefficient = 0.05 self.learning_rate = 1e-4 self.envs = envs self.env_num=8 self.processor = Preprocessor(self.envs.observation_spec()[0]) self.sum_score = 0 self.n_steps = 512 self.gamma = 0.999 self.clip=0.27 self.sum_episode = 0 self.total_updates = -1 self.net = CNN().cuda() self.old_net = copy.deepcopy(self.net) self.old_net.cuda() self.epoch=4 self.batch_size=8 self.optimizer = optim.Adam( self.net.parameters(), self.learning_rate, weight_decay=0.01) def reset(self): self.obs_start = self.envs.reset() self.last_obs = self.processor.preprocess_obs(self.obs_start) def grad_step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() policy, value = self.net(screen, minimap, flat) return policy, value def step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): policy, value = self.net(screen, minimap, flat) return policy, value def old_step(self,observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): policy, value = self.old_net(screen, minimap, flat) return policy, value def select_actions(self, policy, last_obs): available_actions = last_obs['available_actions'] def sample(prob): actions = Categorical(prob).sample() return actions function_pi, args_pi = policy available_actions = torch.FloatTensor(available_actions) function_pi = available_actions*function_pi.cpu() function_pi /= torch.sum(function_pi, dim=1, keepdim=True) try: function_sample = sample(function_pi) except: return 0 args_sample = dict() for type, pi in args_pi.items(): if type.name == 'queued': args_sample[type] = torch.zeros((self.env_num,),dtype=int) else: args_sample[type] = sample(pi).cpu() return function_sample, args_sample def mask_unused_action(self, actions): fn_id, arg_ids = actions for n in range(fn_id.shape[0]): a_0 = fn_id[n] unused_types = set(ACTION_TYPES) - \ set(FUNCTIONS._func_list[a_0].args) for arg_type in unused_types: arg_ids[arg_type][n] = -1 return (fn_id, arg_ids) def functioncall_action(self, actions, size): height, width = size fn_id, arg_ids = actions fn_id = fn_id.numpy().tolist() actions_list = [] for n in range(len(fn_id)): a_0 = fn_id[n] a_l = [] for arg_type in FUNCTIONS._func_list[a_0].args: arg_id = arg_ids[arg_type][n].detach( ).numpy().squeeze().tolist() if is_spatial_action[arg_type]: arg = [arg_id % width, arg_id // height] else: arg = [arg_id] a_l.append(arg) action = FunctionCall(a_0, a_l) actions_list.append(action) return actions_list def get_value(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): _, value = self.net(screen, minimap, flat) return value def train(self): obs_raw = self.obs_start shape = (self.n_steps, self.envs.n_envs) sample_values = np.zeros(shape, dtype=np.float32) sample_obersavation = [] sample_rewards = np.zeros(shape, dtype=np.float32) sample_actions = [] sample_dones = np.zeros(shape, dtype=np.float32) scores = [] last_obs = self.last_obs for step in range(self.n_steps): policy, value = self.step(last_obs) actions = self.select_actions(policy, last_obs) if actions == 0: self.sum_episode = 7 self.sum_score = 0 return actions = self.mask_unused_action(actions) size = last_obs['screen'].shape[2:4] sample_values[step, :] = value.cpu() sample_obersavation.append(last_obs) sample_actions.append(actions) pysc2_action = self.functioncall_action(actions, size) '''fn_id, args_id = actions if fn_id[0].cpu().numpy().squeeze() in obs_raw[0].observation['available_actions']: print('1,True') else: print('1.False'),printoobs_info(obs_raw[0]) if fn_id[1].cpu().numpy().squeeze() in obs_raw[1].observation['available_actions']: print('2,True') else: print('2.False'),printoobs_info(obs_raw[1]) print(last_obs['available_actions'][0][fn_id[0]], last_obs['available_actions'][1][fn_id[1]],fn_id)''' obs_raw = self.envs.step(pysc2_action) # print("0:",pysc2_action[0].function) # print("1:",pysc2_action[1].function) last_obs = self.processor.preprocess_obs(obs_raw) sample_rewards[step, :] = [ i.reward for i in obs_raw] sample_dones[step, :] = [i.last() for i in obs_raw] for i in obs_raw: if i.last(): score = i.observation['score_cumulative'][0] self.sum_score += score self.sum_episode += 1 print("episode %d: score = %f" % (self.sum_episode, score)) # if self.sum_episode % 10 == 0: # torch.save(self.net.state_dict(), './save/episode' + # str(self.sum_episode)+'_score'+str(score)+'.pkl') self.last_obs = last_obs next_value = self.get_value(last_obs).cpu() returns = np.zeros( [sample_rewards.shape[0]+1, sample_rewards.shape[1]]) returns[-1, :] = next_value for i in reversed(range(sample_rewards.shape[0])): next_rewards = self.gamma*returns[i+1, :]*(1-sample_dones[i, :]) returns[i, :] = sample_rewards[i, :]+next_rewards returns = returns[:-1, :] advantages = returns-sample_values self.old_net.load_state_dict(self.net.state_dict()) actions = stack_and_flatten_actions(sample_actions) observation = flatten_first_dims_dict( stack_ndarray_dicts(sample_obersavation)) returns = flatten_first_dims(returns) advantages = flatten_first_dims(advantages) self.learn(observation, actions, returns, advantages) def learn(self, observation, actions, returns, advantages): temp=np.arange(returns.shape[0]) minibatch=returns.shape[0]//self.batch_size screen=observation['screen'] flat=observation['flat'] minimap=observation['minimap'] a_actions=observation['available_actions'] args_id=actions[1] for _ in range(self.epoch): np.random.shuffle(temp) for i in range(0,returns.shape[0],minibatch): j=i+minibatch shuffle=temp[i:j] batch_screen=screen[shuffle] batch_minimap=minimap[shuffle] batch_flat=flat[shuffle] batch_a_actions=a_actions[shuffle] batch_observation={'screen': batch_screen, 'minimap': batch_minimap, 'flat': batch_flat, 'available_actions': batch_a_actions} batch_advantages=advantages[shuffle] batch_fn_id=actions[0][shuffle] batch_args_id={k:v[shuffle] for k, v in args_id.items()} batch_actions=(batch_fn_id,batch_args_id) batch_returns=returns[shuffle] batch_advantages = torch.FloatTensor(batch_advantages).cuda() batch_returns = torch.FloatTensor(batch_returns).cuda() batch_advantages = (batch_advantages - batch_advantages.mean()) / (batch_advantages.std() + 1e-8) policy, batch_value = self.grad_step(batch_observation) log_probs = compute_policy_log_probs( batch_observation['available_actions'], policy, batch_actions).squeeze() old_policy, _ =self.old_step(batch_observation) old_log_probs=compute_policy_log_probs( batch_observation['available_actions'], old_policy, batch_actions).squeeze().detach() ratio=torch.exp(log_probs-old_log_probs) temp1=ratio*batch_advantages temp2=torch.clamp(ratio, 1 - self.clip, 1 + self.clip) * batch_advantages policy_loss = -torch.min(temp1, temp2).mean() value_loss = (batch_returns-batch_value).pow(2).mean() entropy_loss = compute_policy_entropy( batch_observation['available_actions'], policy, batch_actions) loss = policy_loss+value_loss*self.value_loss_coefficient +\ entropy_loss*self.entropy_coefficient # loss=loss.requires_grad_() self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0) self.optimizer.step()
class A2C(): def __init__(self, envs, args): self.value_loss_coefficient = args.value_loss_weight self.entropy_coefficient = args.entropy_weight self.learning_rate = args.lr self.envs = envs self.map = args.map self.env_num = args.envs self.save = args.save_eposides self.save_dir = args.save_dir self.processor = Preprocessor(self.envs.observation_spec()[0], self.map, args.process_screen) self.sum_score = 0 self.n_steps = 8 self.gamma = 0.999 self.sum_episode = 0 self.total_updates = -1 if args.process_screen: self.net = CNN(348, 1985).cuda() else: self.net = CNN().cuda() self.optimizer = optim.Adam(self.net.parameters(), self.learning_rate, weight_decay=0.01) def reset(self): self.obs_start = self.envs.reset() self.last_obs = self.processor.preprocess_obs(self.obs_start) def grad_step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() policy, value = self.net(screen, minimap, flat) return policy, value def step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): policy, value = self.net(screen, minimap, flat) return policy, value def select_actions(self, policy, last_obs): available_actions = last_obs['available_actions'] def sample(prob): actions = Categorical(prob).sample() return actions function_pi, args_pi = policy available_actions = torch.FloatTensor(available_actions) function_pi = available_actions * function_pi.cpu() function_pi /= torch.sum(function_pi, dim=1, keepdim=True) try: function_sample = sample(function_pi) except: return 0 args_sample = dict() for type, pi in args_pi.items(): if type.name == 'queued': args_sample[type] = torch.zeros((self.env_num, ), dtype=int) else: args_sample[type] = sample(pi).cpu() return function_sample, args_sample def determined_actions(self, policy, last_obs): available_actions = last_obs['available_actions'] def sample(prob): actions = torch.argmax(prob, dim=1) return actions function_pi, args_pi = policy available_actions = torch.FloatTensor(available_actions) function_pi = available_actions * function_pi.cpu() function_pi /= torch.sum(function_pi, dim=1, keepdim=True) try: function_sample = sample(function_pi) except: return 0 args_sample = dict() for type, pi in args_pi.items(): if type.name == 'queued': args_sample[type] = torch.zeros((self.env_num, ), dtype=int) else: args_sample[type] = sample(pi).cpu() return function_sample, args_sample def mask_unused_action(self, actions): fn_id, arg_ids = actions for n in range(fn_id.shape[0]): a_0 = fn_id[n] unused_types = set(ACTION_TYPES) - \ set(FUNCTIONS._func_list[a_0].args) for arg_type in unused_types: arg_ids[arg_type][n] = -1 return (fn_id, arg_ids) def functioncall_action(self, actions, size): height, width = size fn_id, arg_ids = actions fn_id = fn_id.numpy().tolist() actions_list = [] for n in range(len(fn_id)): a_0 = fn_id[n] a_l = [] for arg_type in FUNCTIONS._func_list[a_0].args: arg_id = arg_ids[arg_type][n].detach().numpy().squeeze( ).tolist() if is_spatial_action[arg_type]: arg = [arg_id % width, arg_id // height] else: arg = [arg_id] a_l.append(arg) action = FunctionCall(a_0, a_l) actions_list.append(action) return actions_list def get_value(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): _, value = self.net(screen, minimap, flat) return value def train(self): obs_raw = self.obs_start shape = (self.n_steps, self.envs.n_envs) sample_values = np.zeros(shape, dtype=np.float32) sample_obersavation = [] sample_rewards = np.zeros(shape, dtype=np.float32) sample_actions = [] sample_dones = np.zeros(shape, dtype=np.float32) scores = [] last_obs = self.last_obs for step in range(self.n_steps): policy, value = self.step(last_obs) actions = self.select_actions(policy, last_obs) if actions == 0: self.sum_episode = 7 self.sum_score = 0 return actions = self.mask_unused_action(actions) size = last_obs['screen'].shape[2:4] sample_values[step, :] = value.cpu() sample_obersavation.append(last_obs) sample_actions.append(actions) pysc2_action = self.functioncall_action(actions, size) obs_raw = self.envs.step(pysc2_action) # print("0:",pysc2_action[0].function) # print("1:",pysc2_action[1].function) last_obs = self.processor.preprocess_obs(obs_raw) sample_rewards[step, :] = [ 1 if i.reward else -0.1 for i in obs_raw ] sample_dones[step, :] = [i.last() for i in obs_raw] for i in obs_raw: if i.last(): score = i.observation['score_cumulative'][0] self.sum_score += score self.sum_episode += 1 print("episode %d: score = %f" % (self.sum_episode, score)) if self.sum_episode % self.save == 0: torch.save( self.net.state_dict(), self.save_dir + '/' + str(self.sum_episode) + '_score' + str(score) + '.pkl') self.last_obs = last_obs next_value = self.get_value(last_obs).cpu() returns = np.zeros( [sample_rewards.shape[0] + 1, sample_rewards.shape[1]]) returns[-1, :] = next_value for i in reversed(range(sample_rewards.shape[0])): next_rewards = self.gamma * returns[i + 1, :] * ( 1 - sample_dones[i, :]) returns[i, :] = sample_rewards[i, :] + next_rewards returns = returns[:-1, :] advantages = returns - sample_values actions = stack_and_flatten_actions(sample_actions) observation = flatten_first_dims_dict( stack_ndarray_dicts(sample_obersavation)) returns = flatten_first_dims(returns) advantages = flatten_first_dims(advantages) self.learn(observation, actions, returns, advantages) def learn(self, observation, actions, returns, advantages): advantages = torch.FloatTensor(advantages).cuda() returns = torch.FloatTensor(returns).cuda() policy, value = self.grad_step(observation) log_probs = compute_policy_log_probs(observation['available_actions'], policy, actions).squeeze() policy_loss = -(log_probs * advantages).mean() value_loss = (returns - value).pow(2).mean() entropy_loss = compute_policy_entropy(observation['available_actions'], policy, actions) loss = policy_loss+value_loss*self.value_loss_coefficient +\ entropy_loss*self.entropy_coefficient #print(loss) # loss=loss.requires_grad_() self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0) self.optimizer.step()
def main(): parser = argparse.ArgumentParser(description='pytorch example: MNIST') parser.add_argument('--batch', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--model', '-m', default='model.pth', help='Network Model') args = parser.parse_args() batch_size = args.batch print('show training log') df = pd.read_csv('train.log') plt.plot(df['epoch'], df['train/accuracy'], label='train/acc.', marker="o") plt.plot(df['epoch'], df['test/accuracy'], label='test/acc.', marker="o") plt.legend(loc='lower right') plt.ylim([0.8, 1.0]) plt.savefig('accuracy.png') plt.show() transform = transforms.Compose([ transforms.ToTensor(), # transform to torch.Tensor transforms.Normalize(mean=(0.5, ), std=(0.5, )) ]) trainset = torchvision.datasets.CIFAR10(root='../cifar10_root', train=True, download=True, transform=transform) testset = torchvision.datasets.CIFAR10(root='../cifar10_root', train=False, download=True, transform=transform) dataset = trainset + testset dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('device:', device) # Load & Predict Test param = torch.load('model.pth') net = CNN() #読み込む前にクラス宣言が必要 net.to(device) # for GPU net.load_state_dict(param) true_list = [] pred_list = [] with torch.no_grad(): for data in dataloader: images, labels = data true_list.extend(labels.tolist()) images, labels = images.to(device), labels.to(device) # for GPU outputs = net(images) _, predicted = torch.max(outputs.data, 1) pred_list.extend(predicted.tolist()) acc = accuracy_score(true_list, pred_list) print('Predict... all data acc.: {:.3f}'.format(acc)) confmat = confusion_matrix(y_true=true_list, y_pred=pred_list) fig, ax = plt.subplots(figsize=(6, 6)) ax.matshow(confmat, cmap=plt.cm.Purples, alpha=0.8) for i in range(confmat.shape[0]): for j in range(confmat.shape[1]): if confmat[i, j] > 0: ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center') plt.xlabel('predicted label') plt.ylabel('true label') plt.tight_layout() plt.savefig('confusion_matrix.png') plt.show()
g_clip = args.g_clip pad = nn_type == "cnn" if __name__ == "__main__": set_seed() log_tracer = LogTracer(nn_type, sep_mode) log_tracer("get train data") train, test, n_vocab = get_train_data(pad, sep_mode) log_tracer.trace_label("train", train) log_tracer.trace_label("test", test) if nn_type == "lstm": mlp = LSTM(n_vocab, n_units, N_OUT) elif nn_type == "cnn": mlp = CNN(n_vocab, n_units, N_OUT) opt = optimizers.Adam() opt.setup(mlp) opt.add_hook(optimizer.WeightDecay(w_decay)) opt.add_hook(optimizer.GradientClipping(g_clip)) log_tracer("start train") for epoch in range(n_epoch): for x, t in generate_bath(train, n_batch): mlp.cleargrads() loss, acc = mlp(x, t, train=True) loss.backward() opt.update() log_tracer.trace_train(epoch, loss.data, acc.data) x_v, t_v = parse_batch(test) loss_v, acc_v = mlp(x_v, t_v)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model_cnn = CNN() model = L.Classifier(model_cnn) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) # stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(args.epoch, 'epoch'), verbose=True) # trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(filename='snapshot_cureent'), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # report predict trainer.extend(report_predict(model_cnn, args.out, args.gpu), trigger=(frequency, 'epoch')) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # save model chainer.serializers.save_npz(args.out + '/' + g_model_filename, model_cnn)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--init') parser.add_argument('--resume') parser.add_argument('--random_board', type=float, default=0) parser.add_argument('--boardnorm', action='store_true') parser.add_argument('--out', default='agent') args = parser.parse_args() game = G2048() def random_action(): return random.choice(np.nonzero(game.movability)[0]) model = Agent(CNN()) if args.init: chainer.serializers.load_npz(args.init, model.model) else: model.model(np.zeros((1, 4, 4))) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() if args.init: optimizer = chainer.optimizers.MomentumSGD(lr=0.01) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) else: optimizer = chainer.optimizers.Adam(eps=1e-2)
import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from net import CNN import ShannonAndBirch # 神经网络参数 batch_size = 128 learning_rate = 1e-3 num_epoches = 40 USE_GPU = torch.cuda.is_available() datas = ShannonAndBirch.getdata() dataset = ShannonAndBirch.trainAndtest(datas, datas[41], batch_size) print(type(dataset[0])) model = CNN(1, 2) if USE_GPU: model = model.cuda() def train(): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(num_epoches): print('epoch {}'.format(epoch + 1)) print('*' * 10) running_loss = 0.0 running_acc = 0.0 for i, data in enumerate(dataset[0], 1):
shuffle=True, num_workers=4) # In[4]: print(train_set.classes) print(train_set.class_to_idx) print(train_set.__len__) print(test_set.classes) print(test_set.class_to_idx) print(test_set.__len__) # In[5]: cnn = CNN() print(cnn) # In[6]: optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) cross_loss = nn.CrossEntropyLoss() # the target label is not one-hotted triplet_loss = TripletLoss(0.5) # 选择损失函数 alpha = 0.5 for epoch in range(EPOCH): print('EPOCH ' + str(epoch)) # for step, (b_x, b_y) in enumerate(train_loader): for step, (anchor, positive, negative) in enumerate(train_loader): #output = cnn(b_x)[0] #loss = loss_func(output, b_y)
def main(): # parser是训练和测试的一些参数设置,如果default里面有数值,则默认用它, # 要修改可以修改default,也可以在命令行输入 parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--model', default='CNN',#这里选择你要训练的模型 help='CNN or MLP') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') parser.add_argument('--save_dir', default='output/',#模型保存路径 help='dir saved models') args = parser.parse_args() #torch.cuda.is_available()会判断电脑是否有可用的GPU,没有则用cpu训练 use_cuda = not args.no_cuda and torch.cuda.is_available() print(use_cuda) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader( datasets.FashionMNIST('./fashionmnist_data/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.FashionMNIST('./fashionmnist_data/', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) writer=SummaryWriter()#用于记录训练和测试的信息:loss,acc等 if args.model=='CNN': model = CNN().to(device)#CNN() or MLP if args.model=='MLP': model = MLP().to(device)#CNN() or MLP optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) #optimizer存储了所有parameters的引用,每个parameter都包含gradient scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[12, 24], gamma=0.1) #学习率按区间更新 model.train() log_loss=0 log_acc=0 for epoch in range(1, args.epochs + 1): for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) # negative log likelihood loss(nll_loss), sum up batch cross entropy loss.backward() optimizer.step() # 根据parameter的梯度更新parameter的值 # 这里设置每args.log_interval个间隔打印一次训练信息,同时进行一次验证,并且将验证(测试)的准确率存入writer if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) #下面是模型验证过程 model.eval() test_loss = 0 correct = 0 with torch.no_grad(): # 无需计算梯度 for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) writer.add_scalars('loss', {'train_loss':loss,'val_loss':test_loss},global_step=log_acc) writer.add_scalar('val_accuracy', correct / len(test_loader.dataset), global_step=log_acc) log_acc += 1 print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) model.train() if (args.save_model):#保存训练好的模型 if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) torch.save(model.state_dict(), os.path.join(args.save_dir,args.model+".pt")) writer.add_graph(model, (data,))# 将模型结构保存成图,跟踪数据流动 writer.close()
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # load data set is_training = True training_data = IDDataset(is_training) testing_data = IDDataset(not is_training) # Build data loader data_loader = DataLoader(training_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) # Build the models encoder = CNN(args.hidden_size) decoder = LSTM(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the Models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (image_batch, id_batch) in enumerate(data_loader): # Set mini-batch dataset images = to_var(image_batch) captions = to_var(id_batch) targets = to_var(id_batch[:, 1:]) # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() features = encoder(images) outputs = decoder(features, captions) loss = 0 id_len = targets.size()[1] for j in xrange(id_len): loss += criterion(outputs[:, j, :], targets[:, j]) / id_len loss.backward() optimizer.step() # Print log info if i % args.log_step == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' % (epoch, args.num_epochs, i, total_step, loss.cpu().data.numpy())) # Save the models torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
def main(): start_time = time() args = get_args() if args.checkpoint_dir_name: dir_name = args.checkpoint_dir_name else: dir_name = datetime.datetime.now().strftime('%y%m%d%H%M%S') path_to_dir = Path(__file__).resolve().parents[1] path_to_dir = os.path.join(path_to_dir, *['log', dir_name]) os.makedirs(path_to_dir, exist_ok=True) # tensorboard path_to_tensorboard = os.path.join(path_to_dir, 'tensorboard') os.makedirs(path_to_tensorboard, exist_ok=True) writer = SummaryWriter(path_to_tensorboard) # model saving os.makedirs(os.path.join(path_to_dir, 'model'), exist_ok=True) path_to_model = os.path.join(path_to_dir, *['model', 'model.tar']) # csv os.makedirs(os.path.join(path_to_dir, 'csv'), exist_ok=True) path_to_results_csv = os.path.join(path_to_dir, *['csv', 'results.csv']) path_to_args_csv = os.path.join(path_to_dir, *['csv', 'args.csv']) if not args.checkpoint_dir_name: with open(path_to_args_csv, 'a') as f: args_dict = vars(args) param_writer = csv.DictWriter(f, list(args_dict.keys())) param_writer.writeheader() param_writer.writerow(args_dict) # logging using hyperdash if not args.no_hyperdash: from hyperdash import Experiment exp = Experiment('Classification task on CIFAR10 dataset with CNN') for key in vars(args).keys(): exec("args.%s = exp.param('%s', args.%s)" % (key, key, key)) else: exp = None path_to_dataset = os.path.join( Path(__file__).resolve().parents[2], 'datasets') os.makedirs(path_to_dataset, exist_ok=True) train_loader, eval_loader, classes = get_loader( batch_size=args.batch_size, num_workers=args.num_workers, path_to_dataset=path_to_dataset) # show some of the training images, for fun. dataiter = iter(train_loader) images, labels = dataiter.next() img_grid = torchvision.utils.make_grid(images) matplotlib_imshow(img_grid) writer.add_image('four_CIFAR10_images', img_grid) # define a network, loss function and optimizer model = CNN() writer.add_graph(model, images) model = torch.nn.DataParallel(model) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) start_epoch = 0 # resume training if args.checkpoint_dir_name: print('\nLoading the model...') checkpoint = torch.load(path_to_model) model.state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] + 1 summary(model, input_size=(3, 32, 32)) model.to(args.device) # train the network print('\n--------------------') print('Start training and evaluating the CNN') for epoch in range(start_epoch, args.n_epoch): start_time_per_epoch = time() train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.device, writer, epoch, classes) eval_loss, eval_acc = eval(eval_loader, model, criterion, args.device) elapsed_time_per_epoch = time() - start_time_per_epoch result_dict = { 'epoch': epoch, 'train_loss': train_loss, 'eval_loss': eval_loss, 'train_acc': train_acc, 'eval_acc': eval_acc, 'elapsed time': elapsed_time_per_epoch } with open(path_to_results_csv, 'a') as f: result_writer = csv.DictWriter(f, list(result_dict.keys())) if epoch == 0: result_writer.writeheader() result_writer.writerow(result_dict) # checkpoint torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, path_to_model) if exp: exp.metric('train loss', train_loss) exp.metric('eval loss', eval_loss) exp.metric('train acc', train_acc) exp.metric('eval acc', eval_acc) else: print(result_dict) writer.add_scalar('loss/train_loss', train_loss, epoch * len(train_loader)) writer.add_scalar('loss/eval_loss', eval_loss, epoch * len(eval_loader)) writer.add_scalar('acc/train_acc', train_acc, epoch * len(train_loader)) writer.add_scalar('acc/eval_acc', eval_acc, epoch * len(eval_loader)) elapsed_time = time() - start_time print('\nFinished Training, elapsed time ===> %f' % elapsed_time) if exp: exp.end() writer.close()
def main(): parser = argparse.ArgumentParser(description='pytorch example: MNIST') parser.add_argument('--batch', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--display', '-d', type=int, default=100, help='Number of interval to show progress') args = parser.parse_args() batch_size = args.batch epoch_size = args.epoch display_interval = args.display transform = transforms.Compose( [transforms.ToTensor(), # transform to torch.Tensor transforms.Normalize(mean=(0.5,), std=(0.5,))]) trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform) # trainset... # <class 'object'> # <class 'torch.utils.data.dataset.Dataset'> # <class 'torchvision.datasets.mnist.MNIST'> # trainset[0][0]... # <class 'object'> # <class 'torch._C._TensorBase'> # <class 'torch.Tensor'> torch.Size([1, 28, 28]) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('device:', device) net = CNN() print(net) print() net.to(device) # for GPU criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) epoch_list = [] train_acc_list = [] test_acc_list = [] for epoch in range(epoch_size): # loop over the dataset multiple times running_loss = 0.0 train_true = [] train_pred = [] for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # inputs... # <class 'object'> # <class 'torch._C._TensorBase'> # <class 'torch.Tensor'> torch.Size([100, 1, 28, 28]) train_true.extend(labels.tolist()) inputs, labels = inputs.to(device), labels.to(device) # for GPU # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) train_pred.extend(predicted.tolist()) # print statistics running_loss += loss.item() if i % display_interval == display_interval - 1: # print every 100 mini-batches print('[epochs: {}, mini-batches: {}, images: {}] loss: {:.3f}'.format( epoch + 1, i + 1, (i + 1) * batch_size, running_loss / display_interval)) running_loss = 0.0 test_true = [] test_pred = [] with torch.no_grad(): for data in testloader: images, labels = data test_true.extend(labels.tolist()) images, labels = images.to(device), labels.to(device) # for GPU outputs = net(images) _, predicted = torch.max(outputs.data, 1) test_pred.extend(predicted.tolist()) train_acc = accuracy_score(train_true, train_pred) test_acc = accuracy_score(test_true, test_pred) print(' epocs: {}, train acc.: {:.3f}, test acc.: {:.3f}'.format(epoch + 1, train_acc, test_acc)) print() epoch_list.append(epoch + 1) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print('Finished Training') print('Save Network') torch.save(net.state_dict(), 'model.pth') df = pd.DataFrame({'epoch': epoch_list, 'train/accuracy': train_acc_list, 'test/accuracy': test_acc_list}) print('Save Training Log') df.to_csv('train.log', index=False)
import torch from torch.distributions.categorical import Categorical from rule_base import RuleBase2, RuleBase4, RuleBase7, RuleBase6 from net import CNN, xCNN import argparse parser = argparse.ArgumentParser(description='Starcraft 2 deep RL agents') mapdict = dict() mapdict['CollectMineralShards'] = RuleBase2 mapdict['DefeatRoaches'] = RuleBase4 mapdict['CollectMineralsAndGas'] = RuleBase6 mapdict['BuildMarines'] = RuleBase7 parser.add_argument('--map', type=str, default='CollectMineralShards', help='name of SC2 map') parser.add_argument('--process_screen', action='store_true', help='process screen and minimap') args = parser.parse_args() if args.process_screen: net = CNN(348, 1985).cuda() else: net = CNN().cuda() mapdict[args.map](net, args.map, args.process_screen)
import torch from torch.distributions.categorical import Categorical from rule_base import RuleBase,RuleBasevalue from net import CNN def compute_log_prob(probs, actions): cate_dist = Categorical(probs) log_prob = cate_dist.log_prob(actions).unsqueeze(-1) return log_prob a=1 net=CNN().cuda() RuleBasevalue(net)