def __init__(self, device, trainData, validData, args): self.device = device self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.fadding_model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device) self.fadding_model.load_state_dict( torch.load("model0.33/model.pkl.904")) self.fixed_model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device) self.fixed_model.load_state_dict(torch.load("model0.33/model.pkl.904")) self.criteria = torch.nn.MSELoss() self.opt = torch.optim.AdamW(self.fadding_model.parameters(), lr=8e-5, weight_decay=9e-3) # self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr) self.batch_size = args.batch_size self.model_dir = args.arch if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0
def __init__(self, trainData, validData, hidden_size, device, model_dir="model"): self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.classficationA = SimpleNet(input_size=8, output_size=12, hidden_size=hidden_size).to(device) self.classficationB = SimpleNet(input_size=9, output_size=12, hidden_size=hidden_size).to(device) self.criterion = nn.CrossEntropyLoss() self.mse_loss = nn.MSELoss() self.opt_C_A = torch.optim.Adam(self.classficationA.parameters(), lr=1e-4) self.opt_C_B = torch.optim.Adam(self.classficationB.parameters(), lr=1e-4) self.device = device self.model_dir = model_dir if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0
def __init__(self, trainData, validData, hidden_size, device, model_dir="model"): self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.generator = Generator(input_size=8, output_size=1, hidden_size=hidden_size).to(device) self.discriminator = Discriminator(input_size=1, output_size=1, hidden_size=hidden_size).to(device) self.classfication = SimpleNet(input_size=9, output_size=12, hidden_size=hidden_size).to(device) self.adversarial_loss = nn.BCEWithLogitsLoss() self.criterion = nn.CrossEntropyLoss() self.opt_G = torch.optim.Adam(self.generator.parameters(), lr=1e-4) self.opt_D = torch.optim.Adam(self.discriminator.parameters(), lr=1e-4) self.opt_C = torch.optim.Adam(self.classfication.parameters(), lr=1e-4) self.device = device self.model_dir = model_dir if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0
def __init__(self, device, trainData, validData, args): self.device = device self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device) self.criteria = torch.nn.CrossEntropyLoss() self.opt = torch.optim.AdamW(self.model.parameters(), lr=args.lr, weight_decay=3.3e-1) self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr) self.batch_size = args.batch_size self.model_dir = args.arch if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0
def model_fn(model_dir): """Load the PyTorch model from the `model_dir` directory.""" print("Loading model.") # First, load the parameters used to create the model. model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) print("model_info: {}".format(model_info)) # Determine the device and construct the model. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = SimpleNet(model_info['input_dim'], model_info['hidden_dim'], model_info['num_hidden'], model_info['output_dim']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) # prep for testing model.to(device).eval() print("Done loading model.") return model
def model_fn(model_dir): logger.info("Loading model.") # First, load the parameters used to create the model. model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) logger.info("model_info: {}".format(model_info)) # Determine the device and construct the model. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = SimpleNet(model_info['input_dim'], model_info['hidden_dim'], model_info['output_dim']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) return model.to(device)
def train(self, batch_size, epochs): num_classes = 10 if self.isLogEnabled: print('Trainning MNIST!') if K.image_data_format() == 'channels_first': self.x_train = self.x_train.reshape(self.x_train.shape[0], 1, self.img_rows, self.img_cols) self.x_test = self.x_test.reshape(self.x_test.shape[0], 1, self.img_rows, self.img_cols) input_shape = (1, self.img_rows, self.img_cols) else: self.x_train = self.x_train.reshape(self.x_train.shape[0], self.img_rows, self.img_cols, 1) self.x_test = self.x_test.reshape(self.x_test.shape[0], self.img_rows, self.img_cols, 1) input_shape = (self.img_rows, self.img_cols, 1) self.x_train = self.x_train.astype('float32') self.x_test = self.x_test.astype('float32') self.x_train /= 255 self.x_test /= 255 if self.isLogEnabled: print('x_train shape:', self.x_train.shape) print(self.x_train.shape[0], 'train samples') print(self.x_test.shape[0], 'test samples') # convert class vectors to binary class matrices self.y_train = keras.utils.to_categorical(self.y_train, num_classes) self.y_test = keras.utils.to_categorical(self.y_test, num_classes) self.model = SimpleNet.build(num_classes, input_shape) self.model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) self.model.fit(self.x_train, self.y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(self.x_test, self.y_test)) score = self.model.evaluate(self.x_test, self.y_test, verbose=0) if self.isLogEnabled: print('Test loss:', score[0]) print('Test accuracy:', score[1])
def main(): global actor_critic, directory, weight num_cls = args.wave_num * args.k + 1 # 所有的路由和波长选择组合,加上啥都不选 if args.append_route.startswith("True"): channel_num = args.wave_num+args.k else: channel_num = args.wave_num # 解析weight if args.weight.startswith('None'): weight = None else: weight = args.weight # CNN学习模式下,osb的shape应该是CHW assert args.mode.startswith('learning') # 模型初始化 if args.cnn.startswith('mobilenetv2'): actor_critic = MobileNetV2(in_channels=channel_num, num_classes=num_cls, t=6) elif args.cnn.startswith('simplenet'): actor_critic = SimpleNet(in_channels=channel_num, num_classes=num_cls) elif args.cnn.startswith('simplestnet'): actor_critic = SimplestNet(in_channels=channel_num, num_classes=num_cls) elif args.cnn.startswith('alexnet'): actor_critic = AlexNet(in_channels=channel_num, num_classes=num_cls) elif args.cnn.startswith('squeezenet'): actor_critic = SqueezeNet(in_channels=channel_num, num_classes=num_cls, version=1.0) else: raise NotImplementedError times = 1 # 重复次数 prefix = "trained_models" directory = os.path.join(prefix, 'a2c', args.cnn, args.step_over) if args.comp.startswith("states"): all_states_comp() elif args.comp.startswith("random"): random_comp(times=times) elif args.comp.startswith("None"): raise ValueError("Wrong call for this script") else: raise NotImplementedError
hidden_layer = 256 # hidden layer dimension batch_size = 4 # load data train_data = EmotionDataset(root_dir='./train_test', dataset='train') train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) train_data_pytorch = torch.from_numpy(train_data.data).float().to(device) train_label = train_data.labels.squeeze() test_set = sio.loadmat(os.path.join(root_dir, 'test_data.mat'))['test_data'] test_label = (sio.loadmat(os.path.join(root_dir, 'test_label.mat'))['test_label'] + 1).squeeze() test_data_pytorch = torch.from_numpy(test_set).float().to(device) print("data load finished.") # instantiate model model = SimpleNet(310, hidden_layer, 3).to(device) # training process def train(): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=lr) if DRAW: loss_data = [] acc_data = [] start = time.time() for ep in range(max_epoch_num): # train model.train()
increases in computing performance by harnessing the power of the graphics processing unit (GPU). """ # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(device) # Instantiate the model # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam( model.parameters(), lr=args.lr) # You can use stochastic gradient descent instead of Adam criterion = nn.BCELoss( ) # BCELoss() returns one value. Cannot use torch.nn.CrossEntropyLoss() because returns several values # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary
transforms.Normalize((0.1307,), (0.3081,)) ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False) """ train_loader = torch.utils.data.DataLoader(dataset('train'), batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset('test'), batch_size=64) # net = SimpleNet_Bin(10) net = SimpleNet(10) net.cuda() optimizer = optim.Adam(net.parameters(), lr=1e-2, weight_decay=1e-6, betas=(0.9, 0.999)) criterion = nn.CrossEntropyLoss().cuda() criterion_test = nn.CrossEntropyLoss(reduction='sum').cuda() log_path = 'logs/bin' writer = SummaryWriter(log_dir=log_path) epoch_num = 20 lr0 = 1e-4 for epoch in range(epoch_num): current_lr = lr0 / 2**int(epoch / 4)
help='level of verbosity (default: 0)') args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. model = SimpleNet(args.input_dim, args.hidden_dim, args.num_hidden, args.output_dim) if args.verbosity: header('Model Architecture:') print(model) optimizer = optim.SGD(model.parameters(), lr=args.lr) criterion = nn.BCELoss(reduction='mean') if args.verbosity: header('Starting model training...') train(model, train_loader, args.epochs, optimizer, criterion, device) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir)
def main(): logger = logging.getLogger(__name__) ## Load config file with open("config.json", "r") as f: config = json.load(f) ## Cleaning TensorBoard events clean_events(config) ## Load data data_loader = DataLoader(config) X_train, X_test, y_train, y_test = data_loader.get_data() ## Create placeholders X = tf.placeholder(tf.float64, [None, 13]) # y = tf.placeholder(tf.float32, [None, 2]) y = tf.placeholder(tf.float64, [None]) ## Create model and outputs net = SimpleNet(config) net_output = net.forward(X) y_pred, log_sigma = net_output[..., 0], net_output[..., 1] # Track mean of log_sigma across batch of data tf.summary.scalar("mean_log_sigma", tf.reduce_mean(log_sigma)) ## Define metrics based on experiment # Loss type_exp = '_'.join(config['exp_name'].split('_')[:2]) if type_exp == 'vanilla_loss': loss = compute_loss(y_true=y, y_pred=y_pred) elif type_exp == 'loss_bnn': loss = compute_loss_bnn(y_true=y, y_pred=y_pred, log_sigma=log_sigma) # Root Mean Squared Error (RMSE) rmse = compute_rmse(y_true=y, y_pred=y_pred) ## Define optimizer optimizer = net.train_optimizer(loss) ## Merging all summaries merged_summary = tf.summary.merge_all() ## Launching the execution graph for training with tf.Session() as sess: # Initializing all variables sess.run(tf.global_variables_initializer()) # Create train and test writer train_writer = tf.summary.FileWriter("./tensorboard/" + config["exp_name"] + "/train/") test_writer = tf.summary.FileWriter("./tensorboard/" + config["exp_name"] + "/test/") # Visualizing the Graph train_writer.add_graph(sess.graph) for epoch in range(config["trainer"]["num_epochs"]): for batch in range(config["trainer"]["num_iter_per_epoch"]): # Yield next batch of data batch_X, batch_y = next( data_loader.get_next_batch( config["trainer"]["batch_size"])) # Run the optimizer sess.run(optimizer, feed_dict={X: batch_X, y: batch_y}) # Compute train loss and rmse train_loss, train_rmse = sess.run([loss, rmse], feed_dict={ X: batch_X, y: batch_y }) if (epoch % config["trainer"]["writer_step"] == 0): # Run the merged summary and write it to disk s = sess.run(merged_summary, feed_dict={ X: batch_X, y: batch_y }) train_writer.add_summary(s, (epoch + 1)) # Evaluate test data test_loss, test_rmse = sess.run([loss, rmse], feed_dict={ X: X_test, y: y_test }) s = sess.run(merged_summary, feed_dict={X: X_test, y: y_test}) test_writer.add_summary(s, (epoch + 1)) if (epoch % config["trainer"]["display_step"] == 0): print("Epoch: {:03d},".format(epoch + 1), \ "train_loss= {:03f},".format(train_loss), \ "train_rmse= {:03f},".format(train_rmse), \ "test_loss= {:03f},".format(test_loss), \ "test_rmse={:03f}".format(test_rmse) ) print("Training complete")
def main(): """ 主程序 :return: """ num_cls = args.wave_num * args.k + 1 # 所有的路由和波长选择组合,加上啥都不选 action_shape = 1 # action的维度,默认是1. num_updates = int( args.steps) // args.workers // args.num_steps # 梯度一共需要更新的次数 if args.append_route.startswith("True"): channel_num = args.wave_num + args.k else: channel_num = args.wave_num # 解析weight if args.weight.startswith('None'): weight = None else: weight = args.weight # 创建actor_critic if args.mode.startswith('alg'): # ksp(args, weight) return elif args.mode.startswith('learning'): # CNN学习模式下,osb的shape应该是CHW obs_shape = (channel_num, args.img_height, args.img_width) if args.cnn.startswith('mobilenetv2'): actor_critic = MobileNetV2(in_channels=channel_num, num_classes=num_cls, t=6) elif args.cnn.startswith('simplenet'): actor_critic = SimpleNet(in_channels=channel_num, num_classes=num_cls) elif args.cnn.startswith('simplestnet'): actor_critic = SimplestNet(in_channels=channel_num, num_classes=num_cls) elif args.cnn.startswith('alexnet'): actor_critic = AlexNet(in_channels=channel_num, num_classes=num_cls) elif args.cnn.startswith('squeezenet'): actor_critic = SqueezeNet(in_channels=channel_num, num_classes=num_cls, version=1.0) elif args.cnn.startswith('expandsimplenet'): actor_critic = ExpandSimpleNet(in_channels=channel_num, num_classes=num_cls, expand_factor=args.expand_factor) elif args.cnn.startswith('deepersimplenet'): actor_critic = DeeperSimpleNet(in_channels=channel_num, num_classes=num_cls, expand_factor=args.expand_factor) else: raise NotImplementedError # 创建optimizer if args.algo.startswith("a2c"): optimizer = optim.RMSprop(actor_critic.parameters(), lr=args.base_lr, eps=args.epsilon, alpha=args.alpha) elif args.algo.startswith("ppo"): optimizer = optim.Adam(actor_critic.parameters(), lr=args.base_lr, eps=args.epsilon) else: raise NotImplementedError else: raise NotImplementedError if args.cuda.startswith("True"): # 如果要使用cuda进行计算 actor_critic.cuda() # actor_critic = DistModule(actor_critic) # 判断是否是评估模式 if args.evaluate: print("evaluate mode") models = {} times = 1 prefix = "trained_models" directory = os.path.join(prefix, 'a2c', args.cnn, args.step_over) env = RwaGame(net_config=args.net, wave_num=args.wave_num, rou=args.rou, miu=args.miu, max_iter=args.max_iter, k=args.k, mode=args.mode, img_width=args.img_width, img_height=args.img_height, weight=weight, step_over=args.step_over) for model_file in reversed( sorted(os.listdir(directory), key=lambda item: int(item.split('.')[0]))): model_file = os.path.join(directory, model_file) print("evaluate model {}".format(model_file)) params = torch.load(model_file) actor_critic.load_state_dict(params['state_dict']) actor_critic.eval() models[params['update_i']] = {} print("model loading is finished") for t in range(times): total_reward, total_services, allocated_services = 0, 0, 0 obs, reward, done, info = env.reset() while not done: inp = Variable(torch.Tensor(obs).unsqueeze(0), volatile=True) # 禁止梯度更新 value, action, action_log_prob = actor_critic.act( inputs=inp, deterministic=True) # 确定性决策 action = action.data.numpy()[0] obs, reward, done, info = env.step(action=action[0]) total_reward += reward if reward == ARRIVAL_NEWPORT or reward == ARRIVAL_NOPORT: allocated_services += 1 if args.step_over.startswith('one_time'): if info: total_services += 1 elif args.step_over.startswith('one_service'): total_services += 1 else: raise NotImplementedError models[params['update_i']]['time'] = t models[params['update_i']]['reward'] = total_reward models[params['update_i']]['total_services'] = total_services models[params['update_i']][ 'allocated_services'] = allocated_services models[params['update_i']]['bp'] = ( total_services - allocated_services) / total_services # 输出仿真结果 # print("|updated model|test index|reward|bp|total services|allocated services|") # print("|:-----|:-----|:-----|:-----|:-----|:-----|") # for m in sorted(models): for i in range(times): print("|{up}|{id}|{r}|{bp:.4f}|{ts}|{als}|".format( up=params['update_i'], id=models[params['update_i']]['time'], r=models[params['update_i']]['reward'], bp=models[params['update_i']]['bp'], ts=models[params['update_i']]['total_services'], als=models[params['update_i']]['allocated_services'])) return # 创建游戏环境 envs = [ make_env(net_config=args.net, wave_num=args.wave_num, k=args.k, mode=args.mode, img_width=args.img_width, img_height=args.img_height, weight=weight, step_over=args.step_over) for _ in range(args.workers) ] envs = SubprocEnv(envs) # 创建游戏运行过程中相关变量存储更新的容器 rollout = RolloutStorage(num_steps=args.num_steps, num_processes=args.workers, obs_shape=obs_shape, action_shape=action_shape) current_obs = torch.zeros(args.workers, *obs_shape) observation, _, _, _ = envs.reset() update_current_obs(current_obs, observation, channel_num) rollout.observations[0].copy_(current_obs) # These variables are used to compute average rewards for all processes. episode_rewards = torch.zeros([args.workers, 1]) final_rewards = torch.zeros([args.workers, 1]) if args.cuda.startswith("True"): current_obs = current_obs.cuda() rollout.cuda() start = time.time() log_start = time.time() total_services = 0 # log_interval期间一共有多少个业务到达 allocated_services = 0 # log_interval期间一共有多少个业务被分配成功 update_begin = 0 # 判断是否是接续之前的训练 if args.resume: pms = torch.load(args.resume) actor_critic.load_state_dict(pms['state_dict']) optimizer.load_state_dict(pms['optimizer']) update_begin = pms['update_i'] print("resume process from update_i {}, with base_lr {}".format( update_begin, args.base_lr)) for updata_i in range(update_begin, num_updates): update_start = time.time() for step in range(args.num_steps): # 选择行为 inp = Variable(rollout.observations[step], volatile=True) # 禁止梯度更新 value, action, action_log_prob = actor_critic.act( inputs=inp, deterministic=False) # print(action) # 压缩维度,放到cpu上执行。因为没有用到GPU,所以并没有什么卵用,权当提示 cpu_actions = action.data.squeeze(1).cpu().numpy() # 观察observation,以及下一个observation envs.step_async(cpu_actions) obs, reward, done, info = envs.step_wait( ) # reward和done都是(n,)的numpy.ndarray向量 # if reward == ARRIVAL_NEWPORT_NEWPORT or reward == ARRIVAL_NOPORT_NEWPORT or reward == ARRIVAL_NOPORT_NOPORT: # allocated_services += 1 print(reward) for i in reward: if i == ARRIVAL_NEWPORT or i == ARRIVAL_NOPORT: allocated_services += 1 # allocated_services += (reward==ARRIVAL_NEWPORT_NEWPORT or reward==ARRIVAL_NOPORT_NEWPORT or reward==ARRIVAL_NOPORT_NOPORT).any().sum() # 计算分配成功的reward的次数 # TODO 未解决 if args.step_over.startswith('one_service'): total_services += (info == True).sum() # 计算本次step中包含多少个业务到达事件 # elif args.step_over.startswith('one_service'): # total_services += args.workers else: raise NotImplementedError reward = torch.from_numpy(np.expand_dims(reward, 1)).float() episode_rewards += reward # 累加reward分数 # 如果游戏结束,则重新开始计算episode_rewards和final_rewards,并且以返回的reward为初始值重新进行累加。 masks = torch.FloatTensor([[0.0] if d else [1.0] for d in done ]) # True --> 0, False --> 1 final_rewards *= masks final_rewards += (1 - masks) * episode_rewards episode_rewards *= masks # if done[len(done)-1]: # print('游戏结束最终端口数量:',envs.get_all_edges_port()) if args.cuda.startswith("True"): masks = masks.cuda() # 给masks扩充2个维度,与current_obs相乘。则运行结束的游戏进程对应的obs值会变成0,图像上表示全黑,即游戏结束的画面。 current_obs *= masks.unsqueeze(2).unsqueeze(2) update_current_obs(current_obs=current_obs, obs=obs, channel_num=channel_num) # 把本步骤得到的结果存储起来 rollout.insert(step=step, current_obs=current_obs, action=action.data, action_log_prob=action_log_prob.data, value_pred=value.data, reward=reward, mask=masks) # TODO 强行停止 # envs.close() # return # 注意不要引用上述for循环定义的变量。下面变量的命名和使用都要注意。 next_inp = Variable(rollout.observations[-1], volatile=True) # 禁止梯度更新 next_value = actor_critic(next_inp)[0].data # 获取下一步的value值 rollout.compute_returns(next_value=next_value, use_gae=False, gamma=args.gamma, tau=None) if args.algo.startswith('a2c'): # 下面进行A2C算法梯度更新 inps = Variable(rollout.observations[:-1].view(-1, *obs_shape)) acts = Variable(rollout.actions.view(-1, action_shape)) # print("a2cs's acts size is {}".format(acts.size())) value, action_log_probs, cls_entropy = actor_critic.evaluate_actions( inputs=inps, actions=acts) print(cls_entropy.data) # print("inputs' shape is {}".format(inps.size())) # print("value's shape is {}".format(value.size())) value = value.view(args.num_steps, args.workers, 1) # print("action_log_probs's shape is {}".format(action_log_probs.size())) action_log_probs = action_log_probs.view(args.num_steps, args.workers, 1) # 计算loss advantages = Variable(rollout.returns[:-1]) - value value_loss = advantages.pow(2).mean() # L2Loss or MSE Loss action_loss = -(Variable(advantages.data) * action_log_probs).mean() total_loss = value_loss * args.value_loss_coef + action_loss - cls_entropy * args.entropy_coef optimizer.zero_grad() total_loss.backward() # 下面进行迷之操作。。梯度裁剪(https://www.cnblogs.com/lindaxin/p/7998196.html) nn.utils.clip_grad_norm(actor_critic.parameters(), args.max_grad_norm) # average_gradients(actor_critic) optimizer.step() elif args.algo.startswith('ppo'): # 下面进行PPO算法梯度更新 advantages = rollout.returns[:-1] - rollout.value_preds[:-1] advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5) for e in range(args.ppo_epoch): data_generator = rollout.feed_forward_generator( advantages, args.num_mini_batch) for sample in data_generator: observations_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, \ adv_targ = sample # Reshape to do in a single forward pass for all steps values, action_log_probs, cls_entropy = actor_critic.evaluate_actions( Variable(observations_batch), Variable(actions_batch)) adv_targ = Variable(adv_targ) ratio = torch.exp(action_log_probs - Variable(old_action_log_probs_batch)) surr1 = ratio * adv_targ surr2 = torch.clamp(ratio, 1.0 - args.clip_param, 1.0 + args.clip_param) * adv_targ action_loss = -torch.min( surr1, surr2).mean() # PPO's pessimistic surrogate (L^CLIP) value_loss = (Variable(return_batch) - values).pow(2).mean() # 事后一支烟 rollout.after_update() update_time = time.time() - update_start print("updates {} finished, cost time {}:{}".format( updata_i, update_time // 60, update_time % 60)) # print("total services is {}".format(total_services)) # 存储模型 if updata_i % args.save_interval == 0: save_path = os.path.join(args.save_dir, 'a2c') save_path = os.path.join(save_path, args.cnn) save_path = os.path.join(save_path, args.step_over) save_path = os.path.join(save_path, args.parameter) if os.path.exists(save_path) and os.path.isdir(save_path): pass else: os.makedirs(save_path) save_file = os.path.join(save_path, str(updata_i) + '.tar') save_content = { 'update_i': updata_i, 'state_dict': actor_critic.state_dict(), 'optimizer': optimizer.state_dict(), 'mean_reward': final_rewards.mean() } torch.save(save_content, save_file) # 输出日志 if updata_i % args.log_interval == 0: end = time.time() interval = end - log_start remaining_seconds = (num_updates - updata_i - 1) / args.log_interval * interval remaining_hours = int(remaining_seconds // 3600) remaining_minutes = int((remaining_seconds % 3600) / 60) total_num_steps = (updata_i + 1) * args.workers * args.num_steps blocked_services = total_services - allocated_services bp = blocked_services / total_services wave_port_num, total_port_num = envs.get_all_edges_port() wave_occ_sum, resource_utilization_rate = envs.get_resourceUtilization( ) print( "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, \ entropy {:.5f}, value loss {:.5f}, policy loss {:.8f}, remaining time {}:{}, 阻塞率为{}/{}={}, \ 各个波长端口数量为{}, 总的端口数量为{}, 带宽占用情况为{}, 资源占用率为{}".format( updata_i, total_num_steps, int(total_num_steps / (end - start)), final_rewards.mean(), final_rewards.median(), final_rewards.min(), final_rewards.max(), cls_entropy.data, value_loss.data, action_loss.data, remaining_hours, remaining_minutes, blocked_services, total_services, bp, wave_port_num, total_port_num, wave_occ_sum, resource_utilization_rate)) # raise NotImplementedError total_services = 0 allocated_services = 0 log_start = time.time() envs.close()
def _train_pred(self, column, test_features, SEED=None): ''' One training session. self.train_features are features to be used for training. self.train_targets[column] are the answers to the training questions. self.val_features are features to be used for validate self.val_targets[column] is the answer to validate. ''' x_train, x_val, y_train, y_val = train_test_split( self.features, self.targets[column], test_size=0.1, shuffle=True, # stratify=True, ) model = SimpleNet( self.num_hidden_layers, self.dropout_rate, len(x_train.columns), self.hidden_size, 1, ) model.to(self.device) train_dataset = TrainDataset(x_train, y_train) trainloader = torch.utils.data.DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True, ) optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate, weight_decay=1e-3) best_loss = np.inf i = torch.tensor(self.test_features.values, dtype=torch.float) for epoch in range(1, self.epochs): train_loss = train_fn(model, optimizer, nn.BCEWithLogitsLoss(), trainloader, self.device) valid_loss = valid_fn(model, nn.BCEWithLogitsLoss(), x_val, y_val, self.device) self.logger.info( 'Epoch:{}, train_loss:{:.5f}, valid_loss:{:.5f}'.format( epoch, train_loss, valid_loss)) if valid_loss < best_loss: not_update_epoch = 0 best_loss = valid_loss torch.save(model.state_dict(), 'best_model_{}.pth'.format(column)) else: not_update_epoch += 1 # if early_stopping_epoch == not_update_epoch: # print('early stopping') # torch.save(model.state_dict(), 'best_model_{}.pth'.format(column)) # break self.score += best_loss self.num_add += 1 self.logger.info("column:{} validation loss {}".format( column, best_loss)) gc.collect() y_pred = inference_fn(model, self.test_features, self.device) return y_pred, best_loss
class Trainer(): def __init__(self, trainData, validData, hidden_size, device, model_dir="model"): self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.generator = Generator(input_size=8, output_size=1, hidden_size=hidden_size).to(device) self.discriminator = Discriminator(input_size=1, output_size=1, hidden_size=hidden_size).to(device) self.classfication = SimpleNet(input_size=9, output_size=12, hidden_size=hidden_size).to(device) self.adversarial_loss = nn.BCEWithLogitsLoss() self.criterion = nn.CrossEntropyLoss() self.opt_G = torch.optim.Adam(self.generator.parameters(), lr=1e-4) self.opt_D = torch.optim.Adam(self.discriminator.parameters(), lr=1e-4) self.opt_C = torch.optim.Adam(self.classfication.parameters(), lr=1e-4) self.device = device self.model_dir = model_dir if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0 def run_epoch(self, epoch, training): self.generator.train(training) self.discriminator.train(training) self.classfication.train(training) if training: description = 'Train' dataset = self.trainData shuffle = True else: description = 'Valid' dataset = self.validData shuffle = False dataloader = DataLoader(dataset=dataset, batch_size=256, shuffle=shuffle, collate_fn=dataset.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc=description) g_loss = 0 d_loss = 0 loss = 0 acc = accuracy() for i, (ft, missing_ft, labels) in trange: ft = ft.to(self.device) missing_ft = missing_ft.to(self.device) labels = labels.to(self.device) batch_size = ft.shape[0] true = Variable(torch.FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False).to(self.device) # (batch, 1) fake = Variable(torch.FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False).to(self.device) # (batch, 1) # ----------------- # Train Generator # ----------------- gen_missing = self.generator(ft.detach()) validity = self.discriminator(gen_missing) batch_g_loss = self.adversarial_loss(validity, true) if training: self.opt_G.zero_grad() batch_g_loss.backward() self.opt_G.step() g_loss += batch_g_loss.item() # --------------------- # Train Discriminator # --------------------- real_pred = self.discriminator(missing_ft) d_real_loss = self.adversarial_loss(real_pred, true) fake_missing = self.generator(ft.detach()) fake_pred = self.discriminator(fake_missing) d_fake_loss = self.adversarial_loss(fake_pred, fake) batch_d_loss = (d_real_loss + d_fake_loss) / 2 if training: self.opt_D.zero_grad() batch_d_loss.backward() self.opt_D.step() d_loss += batch_d_loss.item() # ------------------ # Train Classifier # ------------------ gen_missing = self.generator(ft.detach()) all_features = torch.cat((ft, gen_missing), dim=1) o_labels = self.classfication(all_features) batch_loss = self.criterion(o_labels, labels) if training: self.opt_C.zero_grad() batch_loss.backward() self.opt_C.step() loss += batch_loss.item() acc.update(o_labels, labels) trange.set_postfix(acc=acc.print_score(), g_loss=g_loss / (i + 1), d_loss=d_loss / (i + 1), loss=loss / (i + 1)) if training: self.history['train'].append({ 'acc': acc.get_score(), 'g_loss': g_loss / len(trange), 'd_loss': d_loss / len(trange), 'loss': loss / len(trange) }) self.save_hist() else: self.history['valid'].append({ 'acc': acc.get_score(), 'g_loss': g_loss / len(trange), 'd_loss': d_loss / len(trange), 'loss': loss / len(trange) }) self.save_hist() if self.best_val < acc.get_score(): self.best_val = acc.get_score() self.save_best(epoch) def save_best(self, epoch): torch.save( { 'cls': self.classfication.state_dict(), 'generator': self.generator.state_dict(), 'discriminator': self.discriminator.state_dict() }, self.model_dir + '/model.pkl.' + str(epoch)) def save_hist(self): with open(self.model_dir + '/history.json', 'w') as f: json.dump(self.history, f, indent=4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or args.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(input_dim=args.input_dim, hidden_dim=args.hidden_dim, output_dim=args.output_dim) model.to(device) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCELoss() # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
class Trainer(): def __init__(self, device, trainData, validData, args): self.device = device self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.fadding_model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device) self.fadding_model.load_state_dict( torch.load("model0.33/model.pkl.904")) self.fixed_model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device) self.fixed_model.load_state_dict(torch.load("model0.33/model.pkl.904")) self.criteria = torch.nn.MSELoss() self.opt = torch.optim.AdamW(self.fadding_model.parameters(), lr=8e-5, weight_decay=9e-3) # self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr) self.batch_size = args.batch_size self.model_dir = args.arch if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0 def run_epoch(self, epoch, training): self.fadding_model.train(training) self.fixed_model.train(False) if training: description = 'Train' dataset = self.trainData shuffle = True else: description = 'Valid' dataset = self.validData shuffle = False dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=shuffle, collate_fn=dataset.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc=description) loss = 0 acc_fadding = accuracy() acc_fixed = accuracy() for i, (ft, missing_ft, labels) in trange: ft = ft.to(self.device) missing_ft = missing_ft.to(self.device) labels = labels.to(self.device) missing_fadding_ft = missing_ft * (0.9**((epoch * 100)**(1 / 2))) missing_0_ft = missing_ft * 0 fadding_ft = torch.cat([missing_fadding_ft, ft], dim=1) zero_ft = torch.cat([missing_0_ft, ft], dim=1) raw_ft = torch.cat([missing_ft, ft], dim=1) fadding_out, fadding_hiddens = self.fadding_model(fadding_ft) zero_out, _ = self.fadding_model(zero_ft) raw_out, raw_hiddens = self.fixed_model(raw_ft) batch_loss = 0 for raw_hidden, fadding_hidden in zip(raw_hiddens, fadding_hiddens): batch_loss += self.criteria(raw_hidden, fadding_hidden) batch_loss += self.criteria(raw_out, fadding_out) if training: self.opt.zero_grad() batch_loss.backward() self.opt.step() loss += batch_loss.item() acc_fadding.update(fadding_out, labels) acc_fixed.update(zero_out, labels) trange.set_postfix(loss=loss / (i + 1), acc_fadding=acc_fadding.print_score(), acc_fixed=acc_fixed.print_score()) # self.scheduler.step() if training: self.history['train'].append({ 'acc-fadding': acc_fadding.get_score(), 'acc_fixed': acc_fixed.get_score(), 'loss': loss / len(trange) }) self.save_hist() else: self.history['valid'].append({ 'acc-fadding': acc_fadding.get_score(), 'acc_fixed': acc_fixed.get_score(), 'loss': loss / len(trange) }) self.save_hist() if acc_fixed.get_score() > self.best_val: self.best_val = acc_fixed.get_score() self.save_best(epoch) def run_iter(self, x, y): features = x.to(self.device) labels = y.to(self.device) o_labels, hiddens = self.model(features) l_loss = self.criteria(o_labels, labels) return o_labels, l_loss def save_best(self, epoch): torch.save(self.fadding_model.state_dict(), self.model_dir + '/model.pkl.' + str(epoch)) def save_hist(self): with open(self.model_dir + '/history.json', 'w') as f: json.dump(self.history, f, indent=4)
args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device {}.".format(device)) torch.manual_seed(args.seed) # Load the training data. train_loader = _get_train_data_loader(args.batch_size, args.data_dir) ## --- Your code here --- ## ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(device) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Trains the model (given line of code, which calls the above training function) train(model, train_loader, args.epochs, criterion, optimizer, device) ## TODO: complete in the model_info by adding three argument names, the first is given # Keep the keys of this dictionary as they are model_info_path = os.path.join(args.model_dir, 'model_info.pth') with open(model_info_path, 'wb') as f: model_info = {
class Trainer(): def __init__(self, trainData, validData, hidden_size, device, model_dir="model"): self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.classficationA = SimpleNet(input_size=8, output_size=12, hidden_size=hidden_size).to(device) self.classficationB = SimpleNet(input_size=9, output_size=12, hidden_size=hidden_size).to(device) self.criterion = nn.CrossEntropyLoss() self.mse_loss = nn.MSELoss() self.opt_C_A = torch.optim.Adam(self.classficationA.parameters(), lr=1e-4) self.opt_C_B = torch.optim.Adam(self.classficationB.parameters(), lr=1e-4) self.device = device self.model_dir = model_dir if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0 def run_epoch(self, epoch, training): self.classficationA.train(training) self.classficationB.train(training) if training: description = 'Train' dataset = self.trainData shuffle = True else: description = 'Valid' dataset = self.validData shuffle = False dataloader = DataLoader(dataset=dataset, batch_size=256, shuffle=shuffle, collate_fn=dataset.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc=description) mse_loss = 0 lossA = 0 lossB = 0 accA = accuracy() accB = accuracy() for i, (ft, missing_ft, labels) in trange: ft = ft.to(self.device) missing_ft = missing_ft.to(self.device) all_ft = torch.cat([ft, missing_ft], dim=1) labels = labels.to(self.device) # ------------------ # Train ClassifierA # ------------------ missing_out, missing_hidden_out = self.classficationA(ft) all_out, all_hidden_out = self.classficationB(all_ft) batch_loss = self.criterion(missing_out, labels) batch_mse_loss = 0 for missing_hidden, all_hidden in zip(missing_hidden_out, all_hidden_out): batch_mse_loss += self.mse_loss(missing_hidden, all_hidden) mse_loss += batch_mse_loss.item() if training: self.opt_C_A.zero_grad() (batch_mse_loss + batch_loss).backward() self.opt_C_A.step() lossA += batch_loss.item() accA.update(missing_out, labels) # ------------------ # Train ClassifierB # ------------------ all_out, _ = self.classficationB(all_ft) batch_loss = self.criterion(all_out, labels) if training: self.opt_C_B.zero_grad() batch_loss.backward() self.opt_C_B.step() lossB += batch_loss.item() accB.update(all_out, labels) trange.set_postfix(accA=accA.print_score(), accB=accB.print_score(), lossA=lossA / (i + 1), lossB=lossB / (i + 1), mseLoss=mse_loss / (i + 1)) if training: self.history['train'].append({ 'accA': accA.get_score(), 'accB': accB.get_score(), 'lossA': lossA / len(trange), 'lossB': lossB / len(trange), 'mseLoss': mse_loss / len(trange) }) self.save_hist() else: self.history['valid'].append({ 'accA': accA.get_score(), 'accB': accB.get_score(), 'lossA': lossA / len(trange), 'lossB': lossB / len(trange), 'mseLoss': mse_loss / len(trange) }) self.save_hist() if self.best_val < accA.get_score(): self.best_val = accA.get_score() self.save_best(epoch) def save_best(self, epoch): torch.save( { 'classficationA': self.classficationA.state_dict(), 'classficationB': self.classficationB.state_dict(), }, self.model_dir + '/model.pkl.' + str(epoch)) def save_hist(self): with open(self.model_dir + '/history.json', 'w') as f: json.dump(self.history, f, indent=4)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--arch', default="model", help='architecture (model_dir)') parser.add_argument('--do_train', action='store_true') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--do_plot', action='store_true') parser.add_argument('--hidden_size', default=256, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--max_epoch', default=10000, type=int) parser.add_argument('--lr', default=1e-3, type=float) parser.add_argument('--step_lr', default=0.5, type=float) parser.add_argument('--cuda', default=0, type=int) parser.add_argument('--ckpt', type=int, help='load pre-trained model epoch') args = parser.parse_args() if args.do_train: dataset = pd.read_csv("../../data/train.csv") dataset.drop("Id", axis=1, inplace=True) train_set, valid_set = train_test_split(dataset, test_size=0.1, random_state=73) feature_for_training = ["F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9"] feature_for_prediction = ["F1"] train = preprocess_samples(train_set, feature_for_training, feature_for_prediction) valid = preprocess_samples(valid_set, feature_for_training, feature_for_prediction) trainData = FeatureDataset(train) validData = FeatureDataset(valid) device = torch.device( 'cuda:%d' % args.cuda if torch.cuda.is_available() else 'cpu') max_epoch = args.max_epoch trainer = Trainer(device, trainData, validData, args) for epoch in range(1, max_epoch + 1): print('Epoch: {}'.format(epoch)) trainer.run_epoch(epoch, True) trainer.run_epoch(epoch, False) if args.do_predict: dataset = pd.read_csv("../../data/test.csv") dataset.drop("Id", axis=1, inplace=True) feature_for_testing = ["F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9"] test = preprocess_samples(dataset, feature_for_testing) testData = FeatureDataset(test) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size) model.load_state_dict( torch.load('%s/model.pkl.%d' % (args.arch, args.ckpt))) model.train(False) model.to(device) dataloader = DataLoader(dataset=testData, batch_size=args.batch_size, shuffle=False, collate_fn=testData.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc='Predict') prediction = [] for i, (ft, _, y) in trange: b = ft.shape[0] missing_ft = torch.zeros(b, 1) all_ft = torch.cat([missing_ft, ft], dim=1) o_labels, _ = model(all_ft.to(device)) o_labels = torch.argmax(o_labels, axis=1) prediction.append(o_labels.to('cpu').numpy().tolist()) prediction = sum(prediction, []) SubmitGenerator(prediction, "../../data/sampleSubmission.csv") if args.do_plot: plot_history("{file}/history.json".format(file=args.arch))
args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## DONE: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to( device) # device is GPU if avaliable or CPU # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## DONE: Define an optimizer and loss function for training optimizer = optim.SGD(model.parameters(), lr=args.lr) criterion = nn.BCELoss() # binary cross entropy loss # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(device) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.optimizer == 'Adam' else optim.SGD(model.parameters(), lr=args.lr) criterion = nn.BCELoss() # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(device) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training criterion = nn.BCELoss() # specify optimizer (stochastic gradient descent) and learning rate = 0.01 optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) criterion = nn.BCELoss() # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
num_workers=12) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=12) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=12) train_data_loader = {'train': train_loader, 'val': val_loader} #%% 2| Model - Preparation device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = SimpleNet() model = model.to(device) criterion = torch.nn.CrossEntropyLoss() optimizer_ft = optim.SGD( model.parameters(), lr=learning_rate, momentum=0.9) # Observe that all parameters are being optimized lr_scheduler = lr_scheduler.StepLR( optimizer_ft, step_size=20, gamma=0.1) # Decay LR by a factor of 0.1 every 7 epochs #%% 3| Model - Training model_trained = train_model(device=device, data_loaders=train_data_loader,
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or args.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = SimpleNet(args.input_dim, args.hidden_dim, args.output_dim).to(device) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training #overral good results optimizer = optim.Adam(model.parameters(), lr=agrs.lr) criterion = nn.BCELoss() # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
class Trainer(): def __init__(self, device, trainData, validData, args): self.device = device self.history = {'train': [], 'valid': []} self.trainData = trainData self.validData = validData self.model = SimpleNet(input_size=9, output_size=12, hidden_size=args.hidden_size).to(device) self.criteria = torch.nn.CrossEntropyLoss() self.opt = torch.optim.AdamW(self.model.parameters(), lr=args.lr, weight_decay=3.3e-1) self.scheduler = scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=200, gamma=args.step_lr) self.batch_size = args.batch_size self.model_dir = args.arch if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_val = 0.0 def run_epoch(self, epoch, training): self.model.train(training) if training: description = 'Train' dataset = self.trainData shuffle = True else: description = 'Valid' dataset = self.validData shuffle = False dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=shuffle, collate_fn=dataset.collate_fn, num_workers=4) trange = tqdm(enumerate(dataloader), total=len(dataloader), desc=description) loss = 0 acc = accuracy() for i, (x, _, y) in trange: o_labels, batch_loss = self.run_iter(x, y) if training: self.opt.zero_grad() batch_loss.backward() self.opt.step() loss += batch_loss.item() acc.update(o_labels.cpu(), y) trange.set_postfix( loss=loss / (i + 1), acc=acc.print_score()) if training: self.history['train'].append({'acc': acc.get_score(), 'loss': loss / len(trange)}) self.save_hist() else: self.history['valid'].append({'acc': acc.get_score(), 'loss': loss / len(trange)}) self.save_hist() if acc.get_score() > self.best_val: self.best_val = acc.get_score() self.save_best(epoch) def run_iter(self, x, y): features = x.to(self.device) labels = y.to(self.device) o_labels = self.model(features) l_loss = self.criteria(o_labels, labels) return o_labels, l_loss def save_best(self, epoch): torch.save(self.model.state_dict(), self.model_dir + '/model.pkl.'+str(epoch)) def save_hist(self): with open(self.model_dir + '/history.json', 'w') as f: json.dump(self.history, f, indent=4)