def __init__(self, gamma, epsilon, lr, n_actions=, input_dims, mem_size, batch_size, eps_min=0.01, eps_dec=5e-7, replace=1000, chkpt_dir='tmp/dueling_ddqn'): self.gamma = gamma self.epsilon = epsilon self.lr = lr self.n_actions = n_actions self.input_dims = input_dims self.batch_size = batch_size self.eps_min = eps_min self.eps_dec = eps_dec self.replace_target_cnt = replace self.chkpt_dir = chkpt_dir self.action_space = [i for i in range(self.n_actions)] self.learn_step_counter = 0 self.memory = ReplayBuffer(mem_size, input_dims, n_actions) self.q_eval = Network(self.lr, self.n_actions, input_dims=self.input_dims, name='lunar_lander_dueling_ddqn_q_eval', chkpt_dir=self.chkpt_dir) self.q_next = Network(self.lr, self.n_actions, input_dims=self.input_dims, name='lunar_lander_dueling_ddqn_q_next', chkpt_dir=self.chkpt_dir)
def __init__(self, env, args): self.env = env self.args = args # define the network self.net = Network(self.env.observation_space.shape[0], self.env.action_space.shape[0]) self.old_net = Network(self.env.observation_space.shape[0], self.env.action_space.shape[0]) # make sure the net and old net have the same parameters self.old_net.load_state_dict(self.net.state_dict()) # define the optimizer self.optimizer = torch.optim.Adam(self.net.critic.parameters(), lr=self.args.lr) # define the running mean filter self.running_state = ZFilter((self.env.observation_space.shape[0], ), clip=5) if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) self.model_path = self.args.save_dir + self.args.env_name if not os.path.exists(self.model_path): os.mkdir(self.model_path) self.start_episode = 0
def __init__(self, state_size, action_size, seed): self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.Q = Network(self.state_size, self.action_size, self.seed) self.Q_dash = Network(self.state_size, self.action_size, self.seed) self.optimizer = optim.Adam(self.Q.parameters(), lr=LR) self.replay = ReplayBuffer(self.seed) self.t_step = 0
def initialize_database(app, db): """Drop and restore database in a consistent state""" with app.app_context(): db.drop_all() db.create_all() first_network = Network(name='First Network', site='DEL18DT') first_network.sensors.extend([Sensor(name='Bulkhead 5 Water Level', value=50), Sensor(name='Bulkhead 7 Water Level', value=20), Sensor(name='Bulkhead 2 Water Level', value=40)]) second_network = Network(name='Second Network', site='DEL23DT') second_network.sensors.extend([Sensor(name='Rain Sensor Front Level', value=250), Sensor(name='Rain Sensor Back Level', value=620)]) db.session.add(first_network) db.session.add(second_network) db.session.commit()
def muzero(config: MuZeroConfig): # Create core objects shared_storage = SharedStorage.remote() # TODO: Decide whether to use CPU or GPU for actor networks initial_network = Network(config.obs_shape, config.action_space_size, device='cpu') shared_storage.save_network.remote(0, initial_network) replay_buffer = ReplayBuffer.remote(config) writer = TensorboardLogger.remote() # Spin up actor processes sim_processes = [] for i in range(config.num_actors): logging.debug('Launching actor #{}'.format(i + 1)) proc = launch_actor_process.remote(config, shared_storage, replay_buffer, i, writer) sim_processes.append(proc) launch_trainer_process.remote(config, shared_storage, replay_buffer, writer) # Update buffer size while True: buffer_size = ray.get(replay_buffer.get_buffer_size.remote()) logging.debug('Buffer size: {}'.format(buffer_size)) time.sleep(20)
def main(): args = get_args() data_dir = "../data/" ## data preparation _, valid_loader = data.load_data(data_dir=data_dir, input_size=224, batch_size=args.batch_size, augmentation=args.augmentation) print('Computing t-SNE embedding') tsne = TSNE(n_components=2) t0 = time() pretrained_model = Network(20).to(args.device) pretrained_model.load_state_dict(torch.load('tsne.pt')) outputs = [] label_list = [] for inputs, labels in valid_loader: inputs = inputs.to(args.device) output = forward(pretrained_model, inputs) outputs.append(output.cpu().detach().numpy().astype(np.float64)) label_list.append(labels) output = np.concatenate(outputs, axis=0) labels = np.concatenate(label_list, axis=0) result = tsne.fit_transform(output) plot_embedding( result, labels, 't-SNE embedding of the 20 classes (time %.2fs)' % (time() - t0))
def post(self): """ Create a new network """ if "Administrator" != current_user.role: return make_response(jsonify({"msg": "Forbidden"}), 403) id = request.json.get("id") name = request.json.get("name") network = Network.query.filter_by(name=name).first() if not network: # If no network exists with that name, then create a new one network = Network(id=id, name=name) db.session.add(network) db.session.commit() ret = {'msg': 'Success'} return make_response(jsonify(ret), 200) else: return make_response( jsonify({ "msg": "Network with that name already exists, please try again with a new name." }), 400)
def main(args): # random seed seed = 1234 np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) # load dataset if args.dataset == 'deepfashion': ds = pd.read_csv(os.path.join(args.data_dir, 'info/df_info.csv')) from dataset import DeepFashionDataset as DataManager elif args.dataset == 'fld': ds = pd.read_csv(os.path.join(args.data_dir, 'info/fld_info.csv')) from dataset import FLDDataset as DataManager else: raise ValueError print('dataset : %s' % (args.dataset)) if not args.evaluate: train_dm = DataManager(ds[ds['evaluation_status'] == 'train'], root=args.data_dir) train_dl = DataLoader(train_dm, batch_size=args.batchsize, shuffle=True) if os.path.exists('models') is False: os.makedirs('models') test_dm = DataManager(ds[ds['evaluation_status'] == 'test'], root=args.data_dir) test_dl = DataLoader(test_dm, batch_size=args.batchsize, shuffle=False) # Load model print("Load the model...") net = torch.nn.DataParallel(Network(dataset=args.dataset, flag=args.glem)) if torch.cuda.is_available(): net = net.cuda() # net = torch.nn.DataParallel(Network(dataset=args.dataset, flag=args.glem)).cuda() if not args.weight_file == None: weights = torch.load(args.weight_file) if args.update_weight: weights = utils.load_weight(net, weights) net.load_state_dict(weights) # evaluate only if args.evaluate: print("Evaluation only") test(net, test_dl, 0) return # learning parameters optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 5, 0.1) print("Start training") for epoch in range(args.epoch): lr_scheduler.step() train(net, optimizer, train_dl, epoch) test(net, test_dl, epoch)
def network(*layers: List[Union[Layer, Iterable[Layer]]]) -> Network: actual = [] for layer in layers: if isinstance(layer, Layer.cls): actual.append(layer) else: actual.extend(layer) return Network(*actual)
def main(): # random seed seed = 1234 np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) # load dataset if args.dataset[0] == 'deepfashion': ds = pd.read_csv('./Anno/df_info.csv') from dataset import DeepFashionDataset as DataManager elif args.dataset[0] == 'fld': ds = pd.read_csv('./Anno/fld_info.csv') from dataset import FLDDataset as DataManager else : raise ValueError print('dataset : %s' % (args.dataset[0])) if not args.evaluate: train_dm = DataManager(ds[ds['evaluation_status'] == 'train'], root=args.root) train_dl = DataLoader(train_dm, batch_size=args.batchsize, shuffle=True) if os.path.exists('models') is False: os.makedirs('models') test_dm = DataManager(ds[ds['evaluation_status'] == 'test'], root=args.root) test_dl = DataLoader(test_dm, batch_size=args.batchsize, shuffle=False) # Load model print("Load the model...") net_cca = torch.nn.DataParallel(Network(dataset=args.dataset, flag=1)).cuda() net_fpn = torch.nn.DataParallel(Network(dataset=args.dataset, flag=0)).cuda() weights = torch.load(weight_cca) net_cca.load_state_dict(weights) weights = torch.load(weight_fpn) net_fpn.load_state_dict(weights) #print('net:\n' + str(net.module))#TEST print("Prediction only") predict(net_cca, net_fpn, test_dl, 0)
def generate_network(data): # generate list of member objects members = dict() for name, value in data.items(): members[name] = Member(value, raw=True) # generate network network = Network(members) return network
def get_context(file_path: str) -> Context: with open(file_path, 'r') as file: context_dict = json.load(file) context = Context(**context_dict) for i in range(len(context.containers)): container = context.containers[i] = Container(**context.containers[i]) container.ports = Port(**container.ports) for i in range(len(context.networks)): context.networks[i] = Network(**context.networks[i]) return context
def allocate_subnet(self, additional_mask_bits, name): from .rest_controller import RestController rest = RestController() import ipaddress as ip net = rest.get_instance(resource='network', resource_id=self.network_id) network = Network(**net) if type(net) is None: pass else: used_sbns = list( map(lambda x: ip.IPv4Network(x.cidr), network.subnets)) n = ip.IPv4Network(network.cidr) psns = list(n.subnets(int(additional_mask_bits))) for sbn in used_sbns: psns = list(filter(lambda x: not sbn.overlaps(x), psns)) subnet_cidr = str(psns[0].compressed) return subnet_cidr
def main_unsupervised_new(used_labels=None): trainset = MNIST('train', used_labels) validset = MNIST('valid', used_labels) net = Network(trainset.n_classes, feature_size=128) params = net.parameters() criterion = LossUnsupervisedNew() optimizer = optim.SGD lr_scheduler = MultiStepLR trainer = SupervisedTrainer(configer, net, params, trainset, validset, criterion, optimizer, lr_scheduler, num_to_keep=5, resume=False, valid_freq=1, show_embedding=True) trainer.train() del trainer
def new_network(): ''' Create new network ''' form = NetworkForm() form.servers.choices = [(s.name, s.name + " - " + s.description) for s in Server.query.order_by('name')] if form.validate_on_submit(): my_network = Network() form.populate_obj(my_network) my_network.servers = ",".join(my_network.servers) db.session.add(my_network) try: db.session.commit() # User info flash('Network created correctly', 'success') return redirect(url_for('networks')) except: db.session.rollback() flash('Error generating network.', 'danger') return render_template('web/new_network.html', form=form)
# Download model and its corresponding meta data from URL # model_path, results_path = download_model(model_url) # Read CSV # df = pd.read_csv(results_path) # First row in CSV, which contains different parameters # row = df.iloc[0] # In[4]: #%% Network # Initialize network model = Network().construct(net, row) model = model.eval() # Load trained model state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict, strict=False) model = model.to(device) gpus = torch.cuda.device_count() if gpus > 1: print("Let's use", gpus, "GPUs!") model = nn.DataParallel(model, device_ids=range(gpus)) # In[5]: #%% Dataset
def run(test_dir, test_srcs, checkpoint, vocab, out="captions.out.txt", batch_size=16, max_seq_len=MAX_LEN, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM, enc_seq_len=ENC_SEQ_LEN, enc_dim=ENC_DIM, attn_activation="relu", deep_out=False, decoder=4, attention=3): if decoder == 1: decoder = models.AttentionDecoder_1 elif decoder == 2: decoder = models.AttentionDecoder_2 elif decoder == 3: decoder = models.AttentionDecoder_3 elif decoder == 4: decoder = models.AttentionDecoder_4 if attention == 1: attention = attentions.AdditiveAttention elif attention == 2: attention = attentions.GeneralAttention elif attention == 3: attention = attentions.ScaledGeneralAttention # load vocabulary vocabulary = Vocab() vocabulary.load(vocab) # load test instances file paths srcs = open(test_srcs).read().strip().split('\n') srcs = [os.path.join(test_dir, s) for s in srcs] # load model net = Network(hid_dim=hidden_dim, out_dim=vocabulary.n_words, sos_token=0, eos_token=1, pad_token=2, emb_dim=emb_dim, enc_seq_len=enc_seq_len, enc_dim=enc_dim, deep_out=deep_out, attention=attention, decoder=decoder) net.to(DEVICE) net.load_state_dict(torch.load(checkpoint)) net.eval() with torch.no_grad(): # run inference num_instances = len(srcs) i = 0 captions = [] while i < num_instances: srcs_batch = srcs[i:i + batch_size] batch = _load_batch(srcs_batch) batch = batch.to(DEVICE) tokens, _ = net(batch, targets=None, max_len=max_seq_len) tokens = tokens.permute(1, 0, 2).detach() _, topi = tokens.topk(1, dim=2) topi = topi.squeeze(2) # decode token output from the model for j in range(len(srcs_batch)): c = vocabulary.tensor_to_sentence(topi[j]) c = ' '.join(c) captions.append(c) i += len(srcs_batch) out_f = open(out, mode='w') for c in captions: out_f.write(c + '\n') return
return torch.square(target - prediction) # Should use dim=0 for single batch, dim=1 for multiple batches def cross_entropy_with_logits(prediction, target, dim=0): return -torch.sum(target * F.log_softmax(prediction, dim=dim), dim=dim) if __name__ == '__main__': print('Cross Entropy Test:', \ cross_entropy_with_logits(torch.tensor([0.0088, 0.1576, -0.0345, -0.0805]), \ torch.tensor([0.0000, 0.1429, 0.4286, 0.4286]))) in_shape = (8 * 3, 96, 96) action_space_size = 4 network = Network(in_shape, action_space_size, 'cuda') batch_size = 3 rollout_len = 5 batch = [] for i in range(batch_size): img = np.ones(in_shape) actions = [Action(2) for _ in range(rollout_len)] # (value, reward, empirical_policy) targets = [(0.7, 0.5, [0.25, 0.25, 0.25, 0.25]) for _ in range(rollout_len + 1)] batch.append((img, actions, targets)) optimizer = optim.SGD(network.parameters(),
from models import db, Network, Server from faker import Factory from random import randint fake = Factory.create() # Spanish #fake = Factory.create('es_ES') # Reload tables db.drop_all() db.create_all() # Make 100 fake networks for num in range(10): name = "net-"+fake.sentence(nb_words=2, variable_nb_words=False, ext_word_list=None).replace(' ','-').replace('.','').lower() cidr = fake.ipv4_private(network=True, address_class='a') servers = "server1,server2" vlanid = randint(1000, 1500) options = "DI" # Save in database mi_network = Network(name=name, cidr=cidr, servers=servers, vlanid=vlanid, options=options) db.session.add(mi_network) mi_server = Server(name='server1', description='descserver', mac_address='00:01:02:03:04:05') db.session.add(mi_server) db.session.commit()
def list(): networks = [Network(network) for network in cli.networks()] return networks_list('networks/list.html', networks)
# netSubpixel = [Subpixel(intLevel) for intLevel in [2, 3, 4, 5, 6]] # print() # for s in netSubpixel: # for k, v in s.state_dict().items(): # print(k + ': ' + str(v.shape)) # print() # netRegularization = [Regularization(intLevel) for intLevel in [2, 3, 4, 5, 6]] # print() # for r in netRegularization: # for k, v in r.state_dict().items(): # print(k + ": " + str(v.shape)) # print() # print("----------------------------------------------------------") # flownet = Network() # for k, v in flownet.state_dict().items(): # print(k + ": " + str(v.shape)) with dg.guard(): flownet = Network() flownet.eval() tenFirst = dg.to_variable( np.zeros((1, 3, 1024, 1024)).astype("float32")) tenSecond = dg.to_variable( np.zeros((1, 3, 1024, 1024)).astype("float32")) out = flownet(tenFirst, tenSecond) print(out.shape)
list_tokenized_validation = tokenizer.texts_to_sequences(test_seg) # Test data, max length is set to 100 input_validation = tf.keras.preprocessing.sequence.pad_sequences( list_tokenized_validation, maxlen=maxlen) # Network input layers inputs = tf.keras.Input(shape=(maxlen, ), name='embds_input') act_input = tf.keras.Input((num_actions, ), name='actions_input') if MODEL == 'DRL': # Create an environment ENV = Environment(input_train, y_train) # Actor-Critic # Train critic_model = Network(num_actions, embeddings_matrix, maxlen=maxlen) _ = critic_model(inputs=[inputs, act_input]) critic_model.compile(loss='mse', optimizer='adam') # Decision actor_q_model = tf.keras.Model( inputs=critic_model.input, outputs=critic_model.get_layer('q_outputs').output) if MODE == 'train': def train(samples): if len(samples) < BATCH_SIZE: return samples = np.array(samples)
op = arch['binary'] if dim == 2 else arch['triple'] best_op = best_arch['binary'] if dim == 2 else best_arch['triple'] logging.info( 'search_epoch: %d finish, arch: %s, rmse: %.4f, arch spent: %.4f' % (search_epoch, op, rmse, time() - arch_start)) logging.info( 'search_epoch: %d, best_arch: %s, best_rmse: %.4f, time spent: %.4f' % (search_epoch, best_op, best_rmse, time() - search_start)) elif 'darts' in args.mode: search_start = time() if args.mode == 'binarydarts': if dim == 2: model = Network(num_users, num_items, args.embedding_dim, args.weight_decay).cuda() elif dim == 3: model = Network_Triple(num_ps, num_qs, num_rs, args.embedding_dim, args.weight_decay).cuda() elif args.mode == 'binarydarts_mlp' or args.mode == 'traindarts': if dim == 2: model = Network_MLP(num_users, num_items, args.embedding_dim, args.weight_decay).cuda() elif dim == 3: model = Network_MLP_Triple(num_ps, num_qs, num_rs, args.embedding_dim, args.weight_decay).cuda() elif args.mode == 'darts': if dim == 2: model = Network_MLP(num_users,
def main(): # random seed seed = 1234 np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) from dataset_df2_loader import DeepFashionDataset as DataManager with open("./data/train/deepfashion2.json", 'r') as infile: ds = json.load(infile) ds = ds['annotations'][0:5] print("dataset", len(ds), args.batchsize, args.epoch) print('dataset : %s' % (args.dataset[0])) if not args.evaluate: train_dm = DataManager(ds, root=args.root) train_dl = DataLoader(train_dm, batch_size=args.batchsize, shuffle=True) if os.path.exists('models') is False: os.makedirs('models') with open("./data/validation/deepfashion2_datafile_8.json", 'r') as infile: test_data = json.load(infile) test_dm = DataManager( test_data['annotations'][0:5], root= "/media/chintu/bharath_ext_hdd/Bharath/Segmentation/Landmark detection/GLE_FLD-master/data/validation/image/" ) test_dl = DataLoader(test_dm, batch_size=args.batchsize, shuffle=False) # Load model print("Load the model...") use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("device:", device) net = torch.nn.DataParallel(Network(dataset=args.dataset, flag=args.glem)).to(device) if not args.weight_file == None: weights = torch.load(args.weight_file) if args.update_weight: weights = utils.load_weight(net, weights) net.load_state_dict(weights) # evaluate only if args.evaluate: print("Evaluation only") test(net, test_dl, 0) return # learning parameters optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 5, 0.1) print('Start training') for epoch in range(args.epoch): lr_scheduler.step() train(net, optimizer, train_dl, epoch) test(net, test_dl, epoch)
def get_model(args): model = Network() state_dict, _ = F.load_dygraph(args.pretrained_model) model.load_dict(state_dict) return model
src = (Path(__file__) / "..").resolve() for f in glob(str(src / "*.py")): shutil.copy(f, dst) if __name__ == "__main__": wandb_logger = WandbLogger(project="nnsplit") parser = ArgumentParser() parser = Network.add_model_specific_args(parser) parser = Trainer.add_argparse_args(parser) parser.set_defaults( gpus=1, max_epochs=1, reload_dataloaders_every_epoch=True, logger=wandb_logger, ) hparams = parser.parse_args() if hparams.logger: store_code(wandb_logger.experiment) model = Network(hparams) n_params = np.sum([np.prod(x.shape) for x in model.parameters()]) trainer = Trainer.from_argparse_args(hparams) print(f"Training model with {n_params} parameters.") trainer.fit(model) if hparams.logger: model.store(Path(wandb_logger.experiment.dir) / "model")
def main(args): # Device device = ("cuda" if torch.cuda.is_available() else "cpu") print("Device: ", device) # Transform, Dataset and Dataloaders transform = transforms.Compose([transforms.ToTensor()]) train_datasets, test_datasets = {}, {} train_loaders, test_loaders = {}, {} permute_idx = [i for i in range(28 * 28)] for i in range(args.num_tasks): train_datasets[i] = PermutedMNIST(transform=transform, train=True, permute_idx=permute_idx) train_loaders[i] = torch.utils.data.DataLoader( train_datasets[i], batch_size=args.batch_size, shuffle=True) test_datasets[i] = PermutedMNIST(transform=transform, train=False, permute_idx=permute_idx) test_loaders[i] = torch.utils.data.DataLoader( test_datasets[i], batch_size=args.batch_size, shuffle=True) random.shuffle(permute_idx) # Model, Optimizer, Criterion, ewc_class if needed model = Network().to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss() ewc_class = None # Recoders train_losses, train_accs, test_losses, test_accs = {}, {}, {}, {} # Train Proper for i in range(args.num_tasks): print("Currently Training on Task {}".format(i + 1)) curr_task = i # Initialize per task recorders train_losses[i], train_accs[i], test_losses[i], test_accs[ i] = [], [], [], [] NUM_EPOCHS = args.num_epochs_per_task for epoch in range(NUM_EPOCHS): # Train train_loss, train_acc = train(model, optimizer, criterion, train_loaders[i], device, args.ewc_train, ewc_class, args.ewc_weight, curr_task) print("[Train Epoch {:>5}/{}] loss: {:>0.4f} | acc: {:>0.4f}". format(epoch, NUM_EPOCHS, train_loss, train_acc)) # Record Loss train_losses[i].append(train_loss) train_accs[i].append(train_acc) # Test for j in range(i + 1): test_loss, test_acc = test(model, criterion, test_loaders[j], device) print( "[ Test Epoch {:>5}/{}] loss: {:>0.4f} | acc: {:>0.4f} [Task {}]" .format(epoch, NUM_EPOCHS, test_loss, test_acc, j)) test_losses[j].append(test_loss) test_accs[j].append(test_acc) if (args.ewc_train): # Consolidate ewc_class = EWC(model, train_loaders, curr_task, device) # Save Losses and Accuracies suffixes = "{}_{}_{}_{}_{}".format(str(args.num_tasks), str(args.num_epochs_per_task), str(args.ewc_weight), "ewc" if args.ewc_train else "std", args.custom_suffix) torch.save(train_losses, "train_losses_{}.txt".format(suffixes)) torch.save(train_accs, "train_accs_{}.txt".format(suffixes)) torch.save(test_losses, "test_losses_{}.txt".format(suffixes)) torch.save(test_accs, "test_accs_{}.txt".format(suffixes))
dir = "./res" render = True env = LunarLander() frames = [] args = parse_args() print(args) is_heuristic = args.heuristic weight_path = args.weight output_path = args.output if not is_heuristic: model = Network(x_shape=env.observation_space.shape[0], y_shape=env.action_space.n, learning_rate=0.02, gamma=0.99, restore_path=weight_path) for i in range(1, 10): total_reward = 0 steps = 0 s = env.reset() epoche_rewards = [] start = time.clock() print("iteration: ", i) while True: env.render() frames.append(Image.fromarray(env.render(mode='rgb_array')))
im_size = 96 epc_seed = 0 row = Config(input_ch=input_ch, padded_im_size=padded_im_size, num_classes=num_classes, im_size=im_size, epc_seed=epc_seed) else: raise Exception('this was expected to be an unreachable line') if args.model in [ 'VGG11', 'VGG11_bn', 'VGG13', 'VGG13_bn', 'VGG16', 'VGG16_bn', 'VGG19', 'VGG19_bn', 'ResNet18', 'DenseNet3_40', 'MobileNet', 'LeNet' ]: model = Network().construct(args.model, row) else: raise Exception('Unknown model argument: {}'.format(args.model)) # state_dict = torch.load(model_weights_path, map_location=lambda storage, loc: storage) # if 'model' in state_dict.keys(): # state_dict = state_dict['model'] # model.load_state_dict(state_dict, strict=True) # model = model.to(device) # model = model.eval() mean, std = get_mean_std(args.dataset) pad = int((row.padded_im_size - row.im_size) / 2) transform = transforms.Compose([ transforms.Pad(pad),
def train(train_feats, train_caps, val_feats, val_caps, train_prefix="", val_prefix="", epochs=EPOCHS, batch_size=BATCH_SIZE, max_seq_len=MAX_LEN, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM, enc_seq_len=ENC_SEQ_LEN, enc_dim=ENC_DIM, clip_val=CLIP_VAL, teacher_force=TEACHER_FORCE_RAT, dropout_p=0.1, attn_activation="relu", epsilon=0.0005, weight_decay=WEIGHT_DECAY, lr=LEARNING_RATE, early_stopping=True, scheduler="step", attention=None, deep_out=False, checkpoint="", out_dir="Pytorch_Exp_Out", decoder=None): print("EXPERIMENT START ", time.asctime()) if not os.path.exists(out_dir): os.mkdir(out_dir) # 1. Load the data train_captions = open(train_caps, mode='r', encoding='utf-8') \ .read().strip().split('\n') train_features = open(train_feats, mode='r').read().strip().split('\n') train_features = [os.path.join(train_prefix, z) for z in train_features] assert len(train_captions) == len(train_features) if val_caps: val_captions = open(val_caps, mode='r', encoding='utf-8') \ .read().strip().split('\n') val_features = open(val_feats, mode='r').read().strip().split('\n') val_features = [os.path.join(val_prefix, z) for z in val_features] assert len(val_captions) == len(val_features) # 2. Preprocess the data train_captions = normalize_strings(train_captions) train_data = list(zip(train_captions, train_features)) train_data = filter_inputs(train_data) print("Total training instances: ", len(train_data)) if val_caps: val_captions = normalize_strings(val_captions) val_data = list(zip(val_captions, val_features)) val_data = filter_inputs(val_data) print("Total validation instances: ", len(val_data)) vocab = Vocab() vocab.build_vocab(map(lambda x: x[0], train_data), max_size=10000) vocab.save(path=os.path.join(out_dir, 'vocab.txt')) print("Vocabulary size: ", vocab.n_words) # 3. Initialize the network, optimizer & loss function net = Network(hid_dim=hidden_dim, out_dim=vocab.n_words, sos_token=0, eos_token=1, pad_token=2, teacher_forcing_rat=teacher_force, emb_dim=emb_dim, enc_seq_len=enc_seq_len, enc_dim=enc_dim, dropout_p=dropout_p, deep_out=deep_out, decoder=decoder, attention=attention) net.to(DEVICE) if checkpoint: net.load_state_dict(torch.load(checkpoint)) optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay) loss_function = nn.NLLLoss() scheduler = set_scheduler(scheduler, optimizer) # 4. Train prev_val_l = sys.maxsize total_instances = 0 total_steps = 0 train_loss_log = [] train_loss_log_batches = [] train_penalty_log = [] val_loss_log = [] val_loss_log_batches = [] val_bleu_log = [] prev_bleu = sys.maxsize train_data = DataLoader(captions=map(lambda x: x[0], train_data), sources=map(lambda x: x[1], train_data), batch_size=batch_size, vocab=vocab, max_seq_len=max_seq_len) if val_caps: val_data = DataLoader(captions=map(lambda x: x[0], val_data), sources=map(lambda x: x[1], val_data), batch_size=batch_size, vocab=vocab, max_seq_len=max_seq_len, val_multiref=True) training_start_time = time.time() for e in range(1, epochs + 1): print("Epoch ", e) tfr = _teacher_force(epochs, e, teacher_force) # train one epoch train_l, inst, steps, t, l_log, pen = train_epoch( model=net, loss_function=loss_function, optimizer=optimizer, data_iter=train_data, max_len=max_seq_len, clip_val=clip_val, epsilon=epsilon, teacher_forcing_rat=tfr) if scheduler is not None: scheduler.step() # epoch logs print("Training loss:\t", train_l) print("Instances:\t", inst) print("Steps:\t", steps) hours = t // 3600 mins = (t % 3600) // 60 secs = (t % 60) print("Time:\t{0}:{1}:{2}".format(hours, mins, secs)) total_instances += inst total_steps += steps train_loss_log.append(train_l) train_loss_log_batches += l_log train_penalty_log.append(pen) print() # evaluate if val_caps: val_l, l_log, bleu = evaluate(model=net, loss_function=loss_function, data_iter=val_data, max_len=max_seq_len, epsilon=epsilon) # validation logs print("Validation loss: ", val_l) print("Validation BLEU-4: ", bleu) if bleu > prev_bleu: torch.save(net.state_dict(), os.path.join(out_dir, 'net.pt')) val_loss_log.append(val_l) val_bleu_log.append(bleu) val_loss_log_batches += l_log #sample model print("Sampling training data...") print() samples = sample(net, train_data, vocab, samples=3, max_len=max_seq_len) for t, s in samples: print("Target:\t", t) print("Predicted:\t", s) print() # if val_caps: # print("Sampling validation data...") # print() # samples = sample(net, val_data, vocab, samples=3, max_len=max_seq_len) # for t, s in samples: # print("Target:\t", t) # print("Predicted:\t", s) # print() if val_caps: # If the validation loss after this epoch increased from the # previous epoch, wrap training. if prev_bleu > bleu and early_stopping: print("\nWrapping training after {0} epochs.\n".format(e + 1)) break prev_val_l = val_l prev_bleu = bleu # Experiment summary logs. tot_time = time.time() - training_start_time hours = tot_time // 3600 mins = (tot_time % 3600) // 60 secs = (tot_time % 60) print("Total training time:\t{0}:{1}:{2}".format(hours, mins, secs)) print("Total training instances:\t", total_instances) print("Total training steps:\t", total_steps) print() _write_loss_log("train_loss_log.txt", out_dir, train_loss_log) _write_loss_log("train_loss_log_batches.txt", out_dir, train_loss_log_batches) _write_loss_log("train_penalty.txt", out_dir, train_penalty_log) if val_caps: _write_loss_log("val_loss_log.txt", out_dir, val_loss_log) _write_loss_log("val_loss_log_batches.txt", out_dir, val_loss_log_batches) _write_loss_log("val_bleu4_log.txt", out_dir, val_bleu_log) print("EXPERIMENT END ", time.asctime())