def train_model(model, inputs, outputs, partition, loss_f, eval_f, opt, epochs, args, model_name='model'): model.cuda() model_path = args.model_path + '/' + model_name train_inputs = inputs[partition[0]] train_outputs = outputs[partition[0]] eval_inputs = inputs[partition[1]] eval_outputs = outputs[partition[1]] train_data = TensorDataset(train_inputs, train_outputs) # Resample train set to a normal distribution over targets #if isinstance(outputs, torch.FloatTensor): # resample_probs = build_resample_probs(train_outputs) # resample_indices = torch.multinomial(resample_probs, train_outputs.size(0), replacement=True) trainval_inputs = train_inputs #copy.deepcopy(train_inputs) trainval_outputs = train_outputs #copy.deepcopy(train_outputs) #if isinstance(outputs, torch.FloatTensor): # train_inputs = train_inputs[resample_indices] # train_outputs = train_outputs[resample_indices] # Pre-calculate the mean of the eval data and from it the error for R^2 #try: # data_mean = torch.mean(eval_outputs).detach() # data_error = eval_f(eval_outputs.detach(), torch.ones(eval_outputs.size(0))*data_mean).detach() / eval_outputs.size(0) #except: #data_mean = torch.mode(eval_outputs, dim=0).values.view(-1).detach() data_error = torch.FloatTensor([1]).to(eval_inputs) train_losses = 0 losses = torch.zeros(args.epochs, 3) try: # Run train/eval loop over specified number of epochs for i_epoch in range(args.epochs): # Increase batch size according to specified schedule args.batch_size = int(args.batch_size + args.batch_size_annealing) if i_epoch < (args.epochs - epochs): continue # Prep Data Loader train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True) # Set model to training mode model.train() i_shuffle = shuffle_data(train_inputs, train_outputs) # Batch training data for i_batch, batch in enumerate(train_loader): batch_inputs, batch_outputs = batch batch_inputs = get_sequences(batch_inputs, rand=True) batch_outputs = batch_inputs ''' # Build batches batch_indices = slice(i_batch*args.batch_size, (i_batch+1)*args.batch_size) batch_inputs = train_inputs[i_shuffle[batch_indices]].cuda() batch_outputs = train_outputs[i_shuffle[batch_indices]].cuda() ''' # If the last batch is size 0, just skip it if batch_outputs.size(0) == 0: continue # Perform gradient update on batch batch_losses = step(model, batch_inputs, batch_outputs, loss_f, opt, args.n_layers) train_losses += torch.sum(batch_losses).detach().cpu().item() train_losses = train_losses / train_inputs.size(0) # Set model to evaluation mode (turn off dropout and stuff) model.eval() n_batches_eval = min((eval_inputs.size(0) // args.batch_size), 10) sum_loss = 0 # Batch the eval data #eval_inputs, eval_outputs = shuffle_data(eval_inputs, eval_outputs) i_shuffle = shuffle_data(eval_inputs, eval_outputs) for i_batch in range(n_batches_eval): batch_indices = slice(i_batch * args.batch_size, (i_batch + 1) * args.batch_size) batch_inputs = eval_inputs[i_shuffle[batch_indices]] batch_inputs = get_sequences(batch_inputs, rand=True) batch_outputs = batch_inputs # Same reasoning as training: sometimes encounter 0-size batches if batch_outputs.size(0) == 0: continue # Don't need to track operations/gradients for evaluation with torch.no_grad(): # Build a sum of evaluation losses to average over later predictions, _ = model(batch_inputs, args.n_layers) predictions = predictions.permute(0, 2, 1)[batch_outputs < 4] weighting = normpdf(batch_outputs.shape)[batch_outputs < 4] sum_loss += torch.sum( eval_f(predictions.squeeze(), batch_outputs[batch_outputs < 4].squeeze()) * weighting).item() n_batches_trainval = min( (trainval_inputs.size(0) // args.batch_size), 10) sum_loss2 = 0 # Batch the eval data #trainval_inputs, trainval_outputs = shuffle_data(trainval_inputs, trainval_outputs) i_shuffle = shuffle_data(trainval_inputs, trainval_outputs) for i_batch in range(n_batches_trainval): batch_indices = slice(i_batch * args.batch_size, (i_batch + 1) * args.batch_size) batch_inputs = trainval_inputs[i_shuffle[batch_indices]] batch_inputs = get_sequences(batch_inputs, rand=True) batch_outputs = batch_inputs # Same reasoning as training: sometimes encounter 0-size batches if batch_outputs.size(0) == 0: continue # Don't need to track operations/gradients for evaluation with torch.no_grad(): # Build a sum of evaluation losses to average over later predictions, _ = model(batch_inputs, args.n_layers) predictions = predictions.permute(0, 2, 1)[batch_outputs < 4] weighting = normpdf(batch_outputs.shape)[batch_outputs < 4] sum_loss2 += torch.sum( eval_f(predictions.squeeze(), batch_outputs[batch_outputs < 4].squeeze()) * weighting).item() # Calculate and print mean train and eval loss over the epoch mean_loss = sum_loss / (args.batch_size * n_batches_eval + 1 ) #eval_inputs.size(0)# mean_loss2 = sum_loss2 / (args.batch_size * n_batches_trainval + 1 ) #trainval_inputs.size(0)# losses[i_epoch, 0] = train_losses losses[i_epoch, 1] = mean_loss losses[i_epoch, 2] = mean_loss2 print( 'Epoch %d Mean Train / TrainVal / Eval Loss and R^2 Value: %.3f / %.3f / %.3f / %.3f ' % (i_epoch + 1, losses[i_epoch, 0], losses[i_epoch, 2], losses[i_epoch, 1], 1 - (mean_loss / data_error).item()), end='\r') if (i_epoch + 1) % args.save_rate == 0: save_model(model, opt, model_path + '_%d.ptm' % (i_epoch + 1)) print('') # to keep only the final epoch losses from each fold return model.cpu(), losses.cpu() except (Exception, KeyboardInterrupt) as e: save_model(model, opt, model_path + '_%d.ptm' % i_epoch) #raise e return e, losses.cpu()
def load_bn_ggn(batch_size=128, dyn_type='table'): # address series_address = './data/bn/mark-14771-adjmat.pickle' adj_address = './data/bn/mark-14771-series.pickle' # 5/7 for training, 1/7 for validation and 1/7 for test use_state = 1024 # adj mat with open(series_address, 'rb') as f: edges = pickle.load(f, encoding='latin1') # time series data with open(adj_address, 'rb') as f: info_train = pickle.load(f, encoding='latin1') # if too large... if info_train.shape[0] > 100000: info_train = info_train[:100000] info_train_list = info_train.tolist() has_loaded = [] i = 0 while len(has_loaded) < use_state: # if dyn type == table then we have to make sure that each state we load is different if dyn_type == 'table': # print(i) if info_train_list[i] not in has_loaded: has_loaded.append(info_train_list[i]) i = i + 2 elif dyn_type == 'prob': # then we dont require they are different has_loaded.append(info_train_list[i]) i = i + 2 else: print('Error in loading') debug() info_train = info_train[:i + 2] # 即将用到的数据,先填充为全0 data_x = np.zeros((int(info_train.shape[0] / 2), info_train.shape[1], 2)) data_y = np.zeros((int(info_train.shape[0] / 2), info_train.shape[1])) # random permutation indices = np.random.permutation(data_x.shape[0]) data_x_temp = [data_x[i] for i in indices] data_y_temp = [data_y[i] for i in indices] data_x = np.array(data_x_temp) data_y = np.array(data_y_temp) # 预处理成分类任务常用的数据格式 for i in range(int(info_train.shape[0] / 2)): for j in range(info_train.shape[1]): if info_train[2 * i][j][0] == 0.: data_x[i][j] = [1, 0] else: data_x[i][j] = [0, 1] if info_train[2 * i + 1][j][0] == 0.: data_y[i][j] = 0 else: data_y[i][j] = 1 # random permutation indices = np.random.permutation(data_x.shape[0]) data_x_temp = [data_x[i] for i in indices] data_y_temp = [data_y[i] for i in indices] data_x = np.array(data_x_temp) data_y = np.array(data_y_temp) # seperate train set,val set and test set # train / val / test == 5 / 1 / 1 train_len = int(data_x.shape[0] * 5 / 7) val_len = int(data_x.shape[0] * 6 / 7) # seperate feat_train = data_x[:train_len] target_train = data_y[:train_len] feat_val = data_x[train_len:val_len] target_val = data_y[train_len:val_len] feat_test = data_x[val_len:] target_test = data_y[val_len:] # change to torch.tensor feat_train = torch.DoubleTensor(feat_train) feat_val = torch.DoubleTensor(feat_val) feat_test = torch.DoubleTensor(feat_test) target_train = torch.LongTensor(target_train) target_val = torch.LongTensor(target_val) target_test = torch.LongTensor(target_test) # put into tensor dataset train_data = TensorDataset(feat_train, target_train) val_data = TensorDataset(feat_val, target_val) test_data = TensorDataset(feat_test, target_test) # put into dataloader train_data_loader = DataLoader(train_data, batch_size=batch_size, drop_last=True) valid_data_loader = DataLoader(val_data, batch_size=batch_size, drop_last=True) test_data_loader = DataLoader(test_data, batch_size=batch_size, drop_last=True) return train_data_loader, valid_data_loader, test_data_loader, edges
def train_TextCNN(subject): print('Reading Data') root = roots[subject] dataset = build_dataset(root) num_topics = len(dataset['label'].unique()) common_texts = dataset['item'].tolist() print('Cleaning Data') common_texts, word2id, valid_words = filter_pad_words( common_texts, max_feature) id2word = dict(zip(word2id.values(), word2id.keys())) origin_texts = [[id2word[ind] for ind in sentence] for sentence in common_texts] print('Training Word2Vec') model = Word2Vec( origin_texts, size=embedding_size, min_count= 1, # this min_count is also used to select words in utils.clean_sentence workers=3, window=5, iter=3) print('Feeding weights') fixed = np.zeros((len(word2id), embedding_size)) for word, ind in word2id.items(): fixed[ind] = np.array(model.wv[word]) fixed = torch.from_numpy(fixed).float() Network = TextCNN(fixed, window_size_list, len(word2id), num_topics, len(word2id) - 1, dropout_rate, embedding_size).to(device) optimizer = optim.Adam(Network.parameters(), lr_schedule[0]) print('Creating training/testing set') label2id = dict(zip(dataset['label'].unique(), range(num_topics))) id2label = dict(zip(label2id.values(), label2id.keys())) X = np.array(common_texts) y = np.array([label2id[label] for label in dataset['label']]).reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101) X_train = torch.tensor(X_train).long() y_train = torch.tensor(y_train).long() X_test = torch.tensor(X_test).long() y_test = torch.tensor(y_test).long() train = TensorDataset(X_train, y_train) test = TensorDataset(X_test, y_test) train_loader = DataLoader(train, 64, True) test_loader = DataLoader(test, 64, False) print('Training\n') criterion = nn.NLLLoss() Network = Network.to(device) Network.train() for i in range(1, epoch + 1): log = [] for X_sample, y_sample in iter(train_loader): X_sample = X_sample.to(device) y_sample = y_sample.view(-1).to(device) logits = Network(X_sample) loss = criterion(logits, y_sample) log.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch {}. Average loss {:.4f}'.format(i, np.mean(log))) if i in lr_schedule: for param_group in optimizer.param_groups: param_group['lr'] = lr_schedule[i] print('\nTesting\n') predictions = [] Network.eval() with torch.no_grad(): for X_sample, _ in iter(test_loader): X_sample = X_sample.to(device) logits = Network(X_sample) _, index = logits.topk(1, 1) index = index.view(-1).cpu().numpy().tolist() predictions += index y_test = y_test.reshape(-1).tolist() y_test = [id2label[ind] for ind in y_test] predictions = [id2label[ind] for ind in predictions] print('\nTest result for {} :'.format(subject)) print(classification_report(y_test, predictions)) return TextCNN
BATCH_SIZE = 64 NUM_EPOCHS = 100 save_name = "/data/maren_semantic_analysis/E2E/{0}_{1}_hidden_model.pt".format( model_save_name, len(LAYER_CONFIG)) # save_name = "/data/maren_semantic_analysis/E2E/glove_freeze_wv/glove_freeze_wv_1_hidden_model.pt" model = FeedForwardNN_multipleLayers(NUM_LABELS, VOCAB_SIZE, LAYER_CONFIG).to(device) # dropout=0.0 if train: #dataset = TensorDataset(torch.from_numpy((train_data_vectorized.todense())).type(torch.float32), \ #torch.from_numpy(np.array(y_train)).type(torch.long)) #dataset = TensorDataset(torch.from_numpy(train_data_vectorized).type(torch.float32),\ #torch.from_numpy(np.array(y_train)).type(torch.long)) dataset = TensorDataset(train_data_vectorized,\ torch.from_numpy(np.array(y_train)).type(torch.long)) dataloader = DataLoader(dataset, batch_size=BATCH_SIZE) loss_function = nn.NLLLoss() optimizer = optim.Adadelta(model.parameters(), weight_decay=1e-4) print("Training ...") for epoch in range(NUM_EPOCHS): total_loss = 0.0 for i, data in enumerate(dataloader): model.zero_grad() inputs, labels_train = data log_probs = model.forward(inputs.to(device))
# -*- encoding: utf-8 from HandwrittenDigitRecognition.data_analysis import load_data from torch.utils.data.dataloader import DataLoader from torch.utils.data.dataset import TensorDataset from torch.autograd import Variable from torch import load, from_numpy, max, argmax from torch.nn import CrossEntropyLoss import matplotlib.pyplot as plt BATCH_SIZE = 64 # 每批数量 data_set = load_data() # 获取测试集数据 test_dataset = TensorDataset( # TensorDataset是torch.utils.data.Dataset的子类 from_numpy(data_set[2]), from_numpy(data_set[3])) test_loader = DataLoader( # dataset参数需是torch.utils.data.Dataset的子类 dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False) # 从文件加载模型 cnn = load('cnn.pkl') # 定义损失函数 criterion = CrossEntropyLoss() # 交叉熵最大损失函数 # 将模型设置成evaluation模式,会影响BatchNorm cnn.eval() eval_loss = 0 eval_acc = 0 for images, labels in test_loader: images = images.unsqueeze(
def get_H_config(self, dataset, will_train=True): print("Preparing training D1+D2 (H)") print("Mixture size: %s" % colored('%d' % len(dataset), 'green')) # 80%, 20% for local train+test train_ds, valid_ds = dataset.split_dataset(0.8) if self.args.D1 in Global.mirror_augment: print(colored("Mirror augmenting %s" % self.args.D1, 'green')) new_train_ds = train_ds + MirroredDataset(train_ds) train_ds = new_train_ds # Initialize the multi-threaded loaders. train_loader = DataLoader(train_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) valid_loader = DataLoader(valid_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) # Set up the criterion # margin must be non-zero. criterion = SVMLoss(margin=1.0).cuda() # Set up the model model = ScoreSVMModelWrapper(self.base_model).cuda() old_valid_loader = valid_loader if will_train: # cache the subnetwork for faster optimization. from methods import get_cached from torch.utils.data.dataset import TensorDataset trainX, trainY = get_cached(model, train_loader, self.args.device) validX, validY = get_cached(model, valid_loader, self.args.device) new_train_ds = TensorDataset(trainX, trainY) new_valid_ds = TensorDataset(validX, validY) # Initialize the new multi-threaded loaders. train_loader = DataLoader(new_train_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) valid_loader = DataLoader(new_valid_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) # Set model to direct evaluation (for cached data) model.set_eval_direct(True) # Set up the config config = IterativeTrainerConfig() base_model_name = self.base_model.__class__.__name__ if hasattr(self.base_model, 'preferred_name'): base_model_name = self.base_model.preferred_name() config.name = '_%s[%s](%s->%s)' % (self.__class__.__name__, base_model_name, self.args.D1, self.args.D2) config.train_loader = train_loader config.valid_loader = valid_loader config.phases = { 'train': { 'dataset': train_loader, 'backward': True }, 'test': { 'dataset': valid_loader, 'backward': False }, 'testU': { 'dataset': old_valid_loader, 'backward': False }, } config.criterion = criterion config.classification = True config.cast_float_label = True config.stochastic_gradient = True config.visualize = not self.args.no_visualize config.model = model config.optim = optim.Adagrad(model.H.parameters(), lr=1e-1, weight_decay=1.0 / len(train_ds)) config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim, patience=10, threshold=1e-1, min_lr=1e-8, factor=0.1, verbose=True) config.logger = Logger() config.max_epoch = 100 return config
def train(appliance_name,model, mains, appliance, epochs, batch_size, pretrain, checkpoint_interval = None, train_patience = 3): # Model configuration if USE_CUDA: model = model.cuda() if not pretrain: model.apply(initialize) summary(model, (1, mains.shape[1])) # Split the train and validation set train_mains,valid_mains,train_appliance,valid_appliance = train_test_split(mains, appliance, test_size=.2, random_state = random_seed) # Create optimizer, loss function, and dataloader optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3) loss_fn = torch.nn.MSELoss(reduction = 'mean') train_dataset = TensorDataset(torch.from_numpy(train_mains).float().permute(0,2,1), torch.from_numpy(train_appliance).float().permute(0,2,1)) train_loader = tud.DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers = 0, drop_last = True) valid_dataset = TensorDataset(torch.from_numpy(valid_mains).float().permute(0,2,1), torch.from_numpy(valid_appliance).float().permute(0,2,1)) valid_loader = tud.DataLoader(valid_dataset, batch_size = batch_size, shuffle = True, num_workers = 0, drop_last = True) writer = SummaryWriter(comment = 'train_visual') patience, best_loss = 0, None for epoch in range(epochs): # Earlystopping if(patience == train_patience): print("val_loss did not improve after {} Epochs, thus Earlystopping is calling".format(train_patience)) break # Train the model st = time.time() model.train() for i, (batch_mains, batch_appliance) in enumerate(train_loader): if USE_CUDA: batch_mains = batch_mains.cuda() batch_appliance = batch_appliance.cuda() batch_pred = model(batch_mains) loss = loss_fn(batch_pred, batch_appliance) model.zero_grad() loss.backward() optimizer.step() ed = time.time() # Evaluate the model model.eval() with torch.no_grad(): cnt, loss_sum = 0, 0 for i, (batch_mains, batch_appliance) in enumerate(valid_loader): if USE_CUDA: batch_mains = batch_mains.cuda() batch_appliance = batch_appliance.cuda() batch_pred = model(batch_mains) loss = loss_fn(batch_appliance, batch_pred) loss_sum += loss cnt += 1 final_loss = loss_sum / cnt # Save best only if best_loss is None or final_loss < best_loss: best_loss = final_loss patience = 0 net_state_dict = model.state_dict() path_state_dict = "./"+appliance_name+"_dae_best_state_dict.pt" torch.save(net_state_dict, path_state_dict) else: patience = patience + 1 print("Epoch: {}, Valid_Loss: {}, Time consumption: {}s.".format(epoch, final_loss, ed - st)) # For the visualization of training process for name,param in model.named_parameters(): writer.add_histogram(name + '_grad', param.grad, epoch) writer.add_histogram(name + '_data', param, epoch) writer.add_scalars("MSELoss", {"Valid":final_loss}, epoch) # Save checkpoint if (checkpoint_interval != None) and ((epoch + 1) % checkpoint_interval == 0): checkpoint = {"model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "epoch": epoch} path_checkpoint = "./"+appliance_name+"_dae_checkpoint_{}_epoch.pt".format(epoch) torch.save(checkpoint, path_checkpoint)
print(f'Using {device}') model = Autoencoder().to(device) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.005) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98) dataset_size = features.shape[0] train_size = int(dataset_size * 0.95) test_size = dataset_size - train_size batch_size = 256 train_tensor = torch.Tensor(features[:train_size]) test_tensor = torch.Tensor(features[train_size:]) train_dataloader = DataLoader(TensorDataset(train_tensor, train_tensor), batch_size=batch_size) test_dataloader = DataLoader(TensorDataset(test_tensor, test_tensor), batch_size=64) epochs = 200 start = time() for t in range(epochs): print(f"Epoch {t+1}\n-------------------------------") train(train_dataloader, model, loss_function, optimizer, device) scheduler.step() test(test_dataloader, model, loss_function) torch.save(model.state_dict(), './model' + get_date())
if device.type == "cuda": for state in optimizer.state.values(): for k, t in state.items(): if torch.is_tensor(t): state[k] = t.cuda() if args.data: dataset = torch.load(args.data, map_location = device) else: dataset = {"inputs":[], "states":[], "targets":[]} if args.replay: targs = torch.cat(data["targets"]) states = torch.cat(data["states"], dim = 1) states = torch.transpose(states, 0,1) inputs = torch.cat(data["inputs"]).squeeze(1) tensor_data = TensorDataset(inps, states, targs) replay_train(j, optimizer, err, device = device, data = tensor_data) else: for epoch in range(args.e): for opp_round in range(args.n): #choose the opponent and random hyperparameters opps = ["const", "rand", "copy", "exp3r", "ucb", "unif", "bayes","rnn"] opp = choice(opps) if opp == "rand": reset_time = randint(2,100) b = 0.8*torch.rand(1).item() j_op = randJanken(bias = b, reset_prob = 1/reset_time)
img = np.reshape(d[b'data'], (-1, 3, 32, 32)) labels = d[b'labels'] train_data = np.concatenate((train_data, img)) train_label = np.concatenate((train_label, labels)) with open('cifar-10-batches-py/test_batch', 'rb') as f: d = pickle.load(f, encoding='bytes') test_data = np.reshape(d[b'data'], (-1, 3, 32, 32)) test_label = d[b'labels'] train_data_normalized = torch.FloatTensor(train_data / 256.0) train_label = torch.LongTensor(train_data) test_data_normalized = torch.FloatTensor(test_data / 256.0) test_label = torch.LongTensor(test_data) train_dataset = TensorDataset(train_data_normalized, train_label) test_dataset = TensorDataset(test_data_normalized, test_label) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Neural network def mse_crossentropy_loss(value, target): # value: [batch, N]
self.linear1 = nn.Linear(1, 5) self.linear2 = nn.Linear(5, 1) def forward(self, x): x = self.linear1(x) x = self.linear2(x) return x def generate_data(size): x = np.random.uniform(size=(size, 1)) y = x * 2.0 return torch.FloatTensor(x), torch.FloatTensor(y) train_x, train_y = generate_data(1000) val_x, val_y = generate_data(100) train_ds = TensorDataset(train_x, train_y) val_ds = TensorDataset(val_x, val_y) train_dl = DataLoader(train_ds, batch_size=8) val_dl = DataLoader(val_ds, batch_size=8) data_bunch = DataBunch(train_dl, val_dl) model = handpose_model() learn = Learner(data_bunch, model, loss_func=F.mse_loss) learn.fit_one_cycle(1)
def prepare_data_and_tokenize(args, tokenizer): # save np.load np_load_old = np.load # modify the default parameters of np.load np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k) logger.debug('loading files as numpy array') train_sentences = np.load("{}/train_sentences.npy".format(args.train)) train_labels = np.load("{}/train_labels.npy".format(args.train)) test_sentences = np.load("{}/test_sentences.npy".format(args.test)) test_labels = np.load("{}/test_labels.npy".format(args.test)) # restore np.load for future normal usage np.load = np_load_old # tokenize and convert train_input_ids = [] train_attention_masks = [] test_input_ids = [] test_attention_masks = [] logger.debug('starting with tokenizing the training sentences') for t in train_sentences: encoded_dict = tokenizer.encode_plus( t, add_special_tokens=True, max_length=50, pad_to_max_length=True, return_attention_mask=True, return_tensors='pt', ) train_input_ids.append(encoded_dict['input_ids']) train_attention_masks.append(encoded_dict['attention_mask']) logger.debug('finished with tokenizing the training sentences') logger.debug('starting with tokenizing the test sentences') for t in test_sentences: encoded_dict = tokenizer.encode_plus( t, add_special_tokens=True, max_length=50, pad_to_max_length=True, return_attention_mask=True, return_tensors='pt', ) test_input_ids.append(encoded_dict['input_ids']) test_attention_masks.append(encoded_dict['attention_mask']) logger.debug('finished with tokenizing the test sentences') train_input_ids = torch.cat(train_input_ids, dim=0) train_attention_mask = torch.cat(train_attention_masks, dim=0) train_labels = torch.tensor(train_labels) test_input_ids = torch.cat(test_input_ids, dim=0) test_attention_mask = torch.cat(test_attention_masks, dim=0) test_labels = torch.tensor(test_labels) logger.debug('Original: {}'.format(train_sentences[0])) logger.debug('Token IDs: {}'.format(train_input_ids[0])) train_dataset = TensorDataset(train_input_ids, train_attention_mask, train_labels) test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels) return (train_dataset, test_dataset)
def main(model_file): run_id = datetime.now().strftime('%Y%m%d_%H%M_finetune') output_path = os.path.join('output', run_id) if not os.path.exists(output_path): os.makedirs(output_path) print('The ID of this run: ' + run_id) print('Output directory: ' + output_path) all_people = os.listdir(config.test_dataset) people_count = len(all_people) print('People count: %d' % people_count) for i, person in enumerate(all_people): print('Progress: %d/%d' % (i, people_count)) # T training images should come from the same video xx = sample_frames(person, config.finetune_T) person_t = person while person_t == person: (person_t, ) = random.sample(all_people, 1) xx_t = sample_frames(person_t, 1) xx_all = xx_t + xx x = list() y = list() detector = get_detector('cuda') for filename in xx_all: img = Image.open(filename).convert('RGB') img = img.resize((config.input_size, config.input_size), Image.LANCZOS) x.append(to_tensor(img, config.input_normalize)) arr = np.array(img) landmarks = extract_landmark(detector, arr) rendered = plot_landmarks(config.input_size, landmarks) y.append(to_tensor(rendered, config.input_normalize)) del detector torch.set_grad_enabled(True) x_t = torch.unsqueeze(x[0], dim=0) y_t = torch.unsqueeze(y[0], dim=0) y_t = y_t.cuda() x = torch.stack(x[1:]) # n * c * h * w y = torch.stack(y[1:]) # sanity check assert x.size(0) == config.finetune_T # load models save_data = torch.load(model_file) _, _, _, G_state_dict, E_state_dict, D_state_dict = save_data[:6] G = Generator(config.G_config, config.input_normalize) G = G.eval() G = G.cuda() E = Embedder(config.E_config, config.embedding_dim) E = E.eval() E = E.cuda() D = Discriminator(config.V_config, config.embedding_dim) D = D.eval() D = D.cuda() with torch.no_grad(): E.load_state_dict(E_state_dict) E_input = torch.cat((x, y), dim=1) E_input = E_input.cuda() e_hat = E(E_input) e_hat = e_hat.view(1, -1, config.embedding_dim) e_hat_mean = torch.mean(e_hat, dim=1, keepdim=False) del E P = G_state_dict['P.weight'] adain = torch.matmul(e_hat_mean, torch.transpose(P, 0, 1)) del G_state_dict['P.weight'] adain = adain.view(1, -1, 2) assert adain.size(1) == G.adain_param_count G_state_dict['adain'] = adain.data G.load_state_dict(G_state_dict) del D_state_dict['embedding.weight'] w0 = D_state_dict['w0'] w = w0 + e_hat_mean del D_state_dict['w0'] D_state_dict['w'] = w.data D.load_state_dict(D_state_dict) x_hat_0 = G(y_t) x_hat_0_img = to_pil_image(x_hat_0, config.input_normalize) del x_hat_0 G = G.train() set_grad_enabled(G, True) D = D.train() set_grad_enabled(D, True) # loss L_EG = Loss_EG_finetune(config.vgg19_layers, config.vggface_layers, config.vgg19_weight_file, config.vggface_weight_file, config.vgg19_loss_weight, config.vggface_loss_weight, config.fm_loss_weight, config.input_normalize) L_EG = L_EG.eval() L_EG = L_EG.cuda() set_grad_enabled(L_EG, False) optim_EG = optim.Adam(G.parameters(), lr=config.lr_EG, betas=config.adam_betas) optim_D = optim.Adam(D.parameters(), lr=config.lr_D, betas=config.adam_betas) # dataset dataset = TensorDataset(x, y) dataloader = DataLoader(dataset, batch_size=config.finetune_batch_size, shuffle=config.dataset_shuffle, num_workers=0, pin_memory=True, drop_last=False) # finetune for epoch in range(config.finetune_epoch): for _, (xx, yy) in enumerate(dataloader): xx = xx.cuda() yy = yy.cuda() optim_EG.zero_grad() optim_D.zero_grad() x_hat = G(yy) d_output = D(torch.cat((xx, yy), dim=1)) d_output_hat = D(torch.cat((x_hat, yy), dim=1)) d_features = d_output[:-1] d_features_hat = d_output_hat[:-1] d_score = d_output[-1] d_score_hat = d_output_hat[-1] l_eg, l_vgg19, l_vggface, l_cnt, l_adv, l_fm = \ L_EG(xx, x_hat, d_features, d_features_hat, d_score_hat) l_d = Loss_DSC(d_score_hat, d_score) loss = l_eg + l_d loss.backward() optim_EG.step() optim_D.step() # train D again optim_D.zero_grad() x_hat = x_hat.detach() # do not need to train the generator d_output = D(torch.cat((xx, yy), dim=1)) d_output_hat = D(torch.cat((x_hat, yy), dim=1)) d_score = d_output[-1] d_score_hat = d_output_hat[-1] l_d2 = Loss_DSC(d_score_hat, d_score) l_d2.backward() optim_D.step() # after finetuning with torch.no_grad(): x_hat_1 = G(y_t) x_hat_1_img = to_pil_image(x_hat_1, config.input_normalize) del x_hat_1 # save image training_img = Image.new( 'RGB', (config.finetune_T * config.input_size, config.input_size)) for j in range(config.finetune_T): img = to_pil_image(x[j], config.input_normalize) training_img.paste(img, (j * config.input_size, 0)) training_img.save(os.path.join(output_path, 't_%d.jpg' % i)) x_t_img = to_pil_image(x_t, config.input_normalize) y_t_img = to_pil_image(y_t, config.input_normalize) output_img = Image.new('RGB', (4 * config.input_size, config.input_size)) output_img.paste(x_hat_0_img, (0, 0)) output_img.paste(x_hat_1_img, (config.input_size, 0)) output_img.paste(x_t_img, (2 * config.input_size, 0)) output_img.paste(y_t_img, (3 * config.input_size, 0)) output_img.save(os.path.join(output_path, 'o_%d.jpg' % i))
def load_data(batch_size=1, suffix=''): loc_train = np.load('data/loc_train' + suffix + '.npy') vel_train = np.load('data/vel_train' + suffix + '.npy') edges_train = np.load('data/edges_train' + suffix + '.npy') loc_valid = np.load('data/loc_valid' + suffix + '.npy') vel_valid = np.load('data/vel_valid' + suffix + '.npy') edges_valid = np.load('data/edges_valid' + suffix + '.npy') loc_test = np.load('data/loc_test' + suffix + '.npy') vel_test = np.load('data/vel_test' + suffix + '.npy') edges_test = np.load('data/edges_test' + suffix + '.npy') # [num_samples, num_timesteps, num_dims, num_atoms] num_atoms = loc_train.shape[3] loc_max = loc_train.max() loc_min = loc_train.min() vel_max = vel_train.max() vel_min = vel_train.min() # Normalize to [-1, 1] loc_train = (loc_train - loc_min) * 2 / (loc_max - loc_min) - 1 vel_train = (vel_train - vel_min) * 2 / (vel_max - vel_min) - 1 loc_valid = (loc_valid - loc_min) * 2 / (loc_max - loc_min) - 1 vel_valid = (vel_valid - vel_min) * 2 / (vel_max - vel_min) - 1 loc_test = (loc_test - loc_min) * 2 / (loc_max - loc_min) - 1 vel_test = (vel_test - vel_min) * 2 / (vel_max - vel_min) - 1 # Reshape to: [num_sims, num_atoms, num_timesteps, num_dims] loc_train = np.transpose(loc_train, [0, 3, 1, 2]) vel_train = np.transpose(vel_train, [0, 3, 1, 2]) feat_train = np.concatenate([loc_train, vel_train], axis=3) edges_train = np.reshape(edges_train, [-1, num_atoms**2]) edges_train = np.array((edges_train + 1) / 2, dtype=np.int64) loc_valid = np.transpose(loc_valid, [0, 3, 1, 2]) vel_valid = np.transpose(vel_valid, [0, 3, 1, 2]) feat_valid = np.concatenate([loc_valid, vel_valid], axis=3) edges_valid = np.reshape(edges_valid, [-1, num_atoms**2]) edges_valid = np.array((edges_valid + 1) / 2, dtype=np.int64) loc_test = np.transpose(loc_test, [0, 3, 1, 2]) vel_test = np.transpose(vel_test, [0, 3, 1, 2]) feat_test = np.concatenate([loc_test, vel_test], axis=3) edges_test = np.reshape(edges_test, [-1, num_atoms**2]) edges_test = np.array((edges_test + 1) / 2, dtype=np.int64) feat_train = torch.FloatTensor(feat_train) edges_train = torch.LongTensor(edges_train) feat_valid = torch.FloatTensor(feat_valid) edges_valid = torch.LongTensor(edges_valid) feat_test = torch.FloatTensor(feat_test) edges_test = torch.LongTensor(edges_test) # Exclude self edges off_diag_idx = np.ravel_multi_index( np.where(np.ones((num_atoms, num_atoms)) - np.eye(num_atoms)), [num_atoms, num_atoms]) edges_train = edges_train[:, off_diag_idx] edges_valid = edges_valid[:, off_diag_idx] edges_test = edges_test[:, off_diag_idx] train_data = TensorDataset(feat_train, edges_train) valid_data = TensorDataset(feat_valid, edges_valid) test_data = TensorDataset(feat_test, edges_test) train_data_loader = DataLoader(train_data, batch_size=batch_size) valid_data_loader = DataLoader(valid_data, batch_size=batch_size) test_data_loader = DataLoader(test_data, batch_size=batch_size) return train_data_loader, valid_data_loader, test_data_loader, loc_max, loc_min, vel_max, vel_min
} inf_morph_samples = { t: torch.tensor(inf_morph_samples[t][0], dtype=torch.long) for t in inf_morph_samples } uninf_analysis_lengths = { t: torch.tensor(uninf_morph_samples[t][1], dtype=torch.long) for t in uninf_morph_samples } uninf_morph_samples = { t: torch.tensor(uninf_morph_samples[t][0], dtype=torch.long) for t in uninf_morph_samples } inf_dev_set = TensorDataset(*[ s['dev'] for s in [ token_samples, token_lengths, inf_morph_samples, inf_analysis_lengths, inf_morph_samples ] ]) inf_test_set = TensorDataset(*[ s['test'] for s in [ token_samples, token_lengths, inf_morph_samples, inf_analysis_lengths, inf_morph_samples ] ]) inf_train_set = TensorDataset(*[ s['train'] for s in [ token_samples, token_lengths, inf_morph_samples, inf_analysis_lengths, inf_morph_samples ] ]) uninf_dev_set = TensorDataset(*[
def idetect_col(self, dataset, col, pos_indices, neg_indices): generator = ErrorGenerator() start_time = time.time() self.training = True logger.info("Transforming data ....") train_dirty_df, train_label_df, rules = generator.fit_transform( dataset, col, pos_indices, neg_indices) # output_pd = train_dirty_df[[col]].copy() # output_pd["label"] = train_label_df[col].values.tolist() # output_pd["rule"] = rules # output_pd.to_csv(f"{self.hparams.debug_dir}/{col}_debug.csv") logger.info(f"Total transformation time: {time.time() - start_time}") feature_tensors_with_labels = self.extract_features( train_dirty_df, train_label_df, col) start_time = time.time() self.model = LSTMModel(self.hparams.model) train_data = TensorDataset(*feature_tensors_with_labels) train_dataloader, _, _ = split_train_test_dls( train_data, unzip_and_stack_tensors, self.hparams.model.batch_size, ratios=[1.0, 0.0], num_workers=0, pin_memory=False, ) num_epochs = 5 if (50000 // len(train_data)) < 5 else (50000 // len(train_data)) print(f"Training for {num_epochs} epochs") self.model.train() if len(train_dataloader) > 0: os.environ["MASTER_PORT"] = str(random.randint(49152, 65535)) trainer = Trainer( gpus=self.hparams.gpus, accelerator="dp", max_epochs=num_epochs, checkpoint_callback=False, logger=False, ) trainer.fit( self.model, train_dataloader=train_dataloader, ) logger.info(f"Total training time: {time.time() - start_time}") start_time = time.time() self.model.eval() self.training = False feature_tensors = self.extract_features(dataset.dirty_df, None, col) pred = self.model.forward(*feature_tensors) result = pred.squeeze().detach().cpu().numpy() logger.info(f"Total prediction time: {time.time() - start_time}") return result
def fit(self, train_df, batch_size=256, group_size=10, lr=1E-2, iter_mult=500, rank_lambda=1.0, test_df=None, train_dir='.'): split_ratio = 0.05 train_df, valid_df, _ = split_df_by_op(train_df, seed=100, ratio=split_ratio) if test_df is not None: test_group_indices = get_group_indices(test_df) # features, labels = get_feature_label(train_df) # logging.info(f'#Train = {len(train_df)},' # f' #Non-invalid Throughputs in Train = {len((labels >0).nonzero()[0])}') # train_num = int(np.ceil((1 - split_ratio) * len(features))) # perm = np.random.permutation(len(features)) # train_features, train_labels = features[perm[:train_num]], labels[perm[:train_num]] # valid_features, valid_labels = features[perm[train_num:]], labels[perm[train_num:]] features, labels = get_feature_label(train_df) valid_features, valid_labels = get_feature_label(valid_df) if test_df is not None: test_features, test_labels = get_feature_label(test_df) epoch_iters = (len(features) + batch_size - 1) // batch_size log_interval = epoch_iters * iter_mult // 500 num_iters = epoch_iters * iter_mult if self.net is None: self._in_units = features.shape[1] self.net = RankingModel(in_units=features.shape[1], units=self._units, num_layers=self._num_layers, dropout=self._dropout, use_gate=self._use_gate, act_type=self._act_type) self.net.cuda() self.net.train() non_invalid_labels = labels[labels > 0] if self._mean_val is None: mean_val = non_invalid_labels.mean() std_val = non_invalid_labels.std() self._mean_val = mean_val self._std_val = std_val else: mean_val = self._mean_val std_val = self._std_val th_features = th.tensor(features, dtype=th.float32) th_labels = th.tensor(labels, dtype=th.float32) dataset = TensorDataset(th_features, th_labels) if self._rank_loss_fn == 'no_rank': batch_sampler = RegressionSampler( thrpt=labels, regression_batch_size=batch_size * group_size) else: batch_sampler = RankGroupSampler( thrpt=labels, rank_batch_size=batch_size, group_size=group_size, beta_params=self._beta_distribution) dataloader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=8) optimizer = torch.optim.Adam(self.net.parameters(), lr=lr, amsgrad=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer=optimizer, T_max=num_iters, eta_min=1E-5) if self._rank_loss_fn != 'no_rank': rank_loss_fn = get_ranking_loss(self._rank_loss_fn) dataloader = iter(dataloader) log_regression_loss = 0 log_ranking_loss = 0 log_cnt = 0 niter = 0 epoch_iter = 0 best_valid_rmse = np.inf no_better = 0 stop_patience = 50 for ranking_features, ranking_labels in dataloader: optimizer.zero_grad() ranking_features = ranking_features.cuda() ranking_labels = ranking_labels.cuda() if self._rank_loss_fn != 'no_rank': ranking_labels = ranking_labels.reshape( (batch_size, group_size)) original_ranking_labels = ranking_labels ranking_labels = (ranking_labels - mean_val) / std_val ranking_scores = self.net(ranking_features) ranking_scores = ranking_scores.reshape( (batch_size, group_size)) loss_regression = torch.square(ranking_scores - ranking_labels).mean() loss_ranking = rank_loss_fn(y_pred=ranking_scores, y_true=original_ranking_labels / std_val) loss = loss_regression + rank_lambda * loss_ranking else: ranking_labels = (ranking_labels - mean_val) / std_val ranking_scores = self.net(ranking_features) loss_regression = torch.square(ranking_scores - ranking_labels).mean() loss = loss_regression loss.backward() optimizer.step() lr_scheduler.step() with torch.no_grad(): log_regression_loss += loss_regression if self._rank_loss_fn != 'no_rank': log_ranking_loss += loss_ranking log_cnt += 1 if log_cnt >= log_interval: logging.info( '[{}/{}] Regression Loss = {:.4f}, Ranking Loss = {:.4f}' .format(niter + 1, num_iters, log_regression_loss / log_cnt, log_ranking_loss / log_cnt)) log_regression_loss = 0 log_ranking_loss = 0 log_cnt = 0 valid_score = self.evaluate(valid_features, valid_labels, 'regression') logging.info( f'[{niter + 1}/{num_iters}], Valid_score={valid_score}' ) if valid_score['rmse'] < best_valid_rmse: best_valid_rmse = valid_score['rmse'] torch.save( self.net.state_dict(), os.path.join(train_dir, 'best_model_states.th')) no_better = 0 else: no_better += 1 if test_df is not None: test_score = self.evaluate( test_features, test_labels, 'regression', group_indices=test_group_indices) logging.info( f'[{niter + 1}/{num_iters}], Test_score={test_score}' ) niter += 1 epoch_iter += 1 if epoch_iter >= epoch_iters: epoch_iter = 0 if niter >= num_iters: break if no_better >= stop_patience: logging.info('Early stop') break self.net.load_state_dict( torch.load(os.path.join(train_dir, 'best_model_states.th')))
def main(argv): # Argparse # these worked # parser = argparse.ArgumentParser(description='PyTorch Shakespeare Example') # parser.add_argument('--batch-size', type=int, default=64, metavar='N', # help='input batch size for training (default: 64)') # parser.add_argument('--save-directory', type=str, default='output/shakespeare', # help='output directory') # parser.add_argument('--epochs', type=int, default=20, metavar='N', # help='number of epochs to train') # parser.add_argument('--batch-len', type=int, default=40, metavar='N', # help='Batch length') # parser.add_argument('--hidden-dim', type=int, default=500, metavar='N', # help='Hidden dim') # parser.add_argument('--embedding-dim', type=int, default=400, metavar='N', # help='Embedding dim') # parser.add_argument('--no-cuda', action='store_true', default=False, # help='disables CUDA training') # args = parser.parse_args(argv) # args.cuda = not args.no_cuda and torch.cuda.is_available() parser = argparse.ArgumentParser(description='PyTorch Shakespeare Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--save-directory', type=str, default='output/shakespeare', help='output directory') parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train') parser.add_argument('--batch-len', type=int, default=40, metavar='N', help='Batch length') parser.add_argument('--hidden-dim', type=int, default=500, metavar='N', help='Hidden dim') parser.add_argument('--embedding-dim', type=int, default=350, metavar='N', help='Embedding dim') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args(argv) args.cuda = not args.no_cuda and torch.cuda.is_available() # Read and process training data targets = np.load( '/home/jack/source/tinker/data/helpmylover/dataset/wiki.train.npy') vocabulary = np.load( '/home/jack/source/tinker/data/helpmylover/dataset/vocab.npy') targets = np.concatenate(targets).ravel() chars, charmap = get_charmap(vocabulary) charcount = len(chars) targets = batchify(targets, args=args) inputs = make_inputs(targets) train_dataset = TensorDataset( torch.from_numpy(inputs).long(), torch.from_numpy(targets).long()) # Validation dataset targets = np.load( '/home/jack/source/tinker/data/helpmylover/dataset/wiki.valid.npy') targets = np.concatenate(targets).ravel() targets = batchify(targets, args=args) inputs = make_inputs(targets) valid_dataset = TensorDataset( torch.from_numpy(inputs).long(), torch.from_numpy(targets).long()) # Train or load a model checkpoint_path = os.path.join(args.save_directory, 'checkpoint.pytorch') if not os.path.exists(checkpoint_path): model = TextModel(charcount=charcount, args=args) train_model(model=model, train_dataset=train_dataset, valid_dataset=valid_dataset, args=args) else: trainer = Trainer().load(from_directory=args.save_directory) model = TextModel(charcount=charcount, args=args) model.load_state_dict(trainer.model.state_dict()) if args.cuda: model = model.cuda() # Generate deterministic text print("Deterministic") generated = generate(model, sequence_length=1000, batch_size=2, stochastic=False, args=args).data.cpu().numpy() print_generated(to_text(preds=generated, charset=chars)) # Seed deterministic text seeds = ['KING RICHARD', 'KING RICHARD', 'Enter Falsta', 'SHAKESPEARE '] assert len(set(len(s) for s in seeds)) == 1 inp = np.array([[charmap[c] for c in l] for l in seeds], dtype=np.int64) inp = np.pad(inp + 1, [(0, 0), (1, 0)], mode='constant') inp = Variable(torch.from_numpy(inp)) if args.cuda: inp = inp.cuda() # Generate stochastic text generated = generate(model, sequence_length=2000, batch_size=5, stochastic=False, inp=inp, args=args).data.cpu().numpy() text = to_text(preds=generated, charset=chars) for i, (s, t) in enumerate(zip(seeds, text)): print("Deterministic #{} (seed={}): {}".format(i, s, t)) # Generate stochastic text print("Stochastic") generated = generate(model, sequence_length=1000, batch_size=5, stochastic=True, args=args).data.cpu().numpy() print_generated(to_text(preds=generated, charset=chars))
def main(model_file): run_id = datetime.now().strftime('%Y%m%d_%H%M_finetune') log_path = os.path.join('log', run_id) if not os.path.exists(log_path): os.makedirs(log_path) print('The ID of this run: ' + run_id) print('Log directory: ' + log_path) # dataset return_all = config.finetune_T + 1 test_sample = config.test_episode * return_all test_dataset = Human36m( config.test_dataset, model1.input_modality, return_all, model1.input_size, model1.input_normalize, False, test_sample, # sample count extend_ratio=0.1, random_flip=False, random_crop=False) test_video_count = len(test_dataset) print('Testing video count: %d' % test_video_count) # network G = model1.Generator() G = G.train() G = G.cuda() print('Generator:') print(str(G) + '\n') E = model1.Embedder() E = E.train() E = E.cuda() print('Embedder:') print(str(E) + '\n') D = model1.Discriminator() D = D.train() D = D.cuda() print('Discriminator:') print(str(D) + '\n') # loss L_EG = model1.Loss_EG() L_EG = L_EG.eval() L_EG = L_EG.cuda() print('Loss_EG:') print(str(L_EG) + '\n') L_DSC = model1.Loss_DSC() L_DSC = L_DSC.eval() L_DSC = L_DSC.cuda() print('Loss_DSC:') print(str(L_DSC) + '\n') # model initialization model1.initialize(G, E, D, L_EG, L_DSC) save_data = torch.load(model_file) _, _, _, G_state_dict, E_state_dict, D_state_dict = save_data[:6] E.load_state_dict(E_state_dict) print('EG learning rate: %.5f, D learning rate: %.5f' % (config.lr_EG, config.lr_D)) for k, v in model1.loss_weight.items(): print('Weight for %s: %.4f' % (k, v)) for k in range(config.test_episode): for _video_idx, _x, _y, _x_t, _y_t in test_dataset: video_name = test_dataset.all_videos[_video_idx] print('%d: %s' % (k, video_name)) image_path = os.path.join(log_path, '%s_%d' % (video_name, k)) if not os.path.exists(image_path): os.makedirs(image_path) dataset = TensorDataset(_x, _y) dataloader = DataLoader(dataset, batch_size=config.finetune_batch_size, shuffle=config.dataset_shuffle, num_workers=0, pin_memory=True, drop_last=False) _x = _x.cuda() _y = _y.cuda() # T * c * h * w _y_t = _y_t.cuda() # c * h * w with torch.no_grad(): e_hat = E(torch.unsqueeze(_x, dim=0), torch.unsqueeze(_y, dim=0)) e_hat = e_hat.view(1, -1, model1.embedding_dim) e_hat_mean = torch.mean(e_hat, dim=1, keepdim=False) G_state_dict_new = G_state_dict.copy() P = G_state_dict_new['P.weight'] adain = torch.matmul(e_hat_mean, torch.transpose(P, 0, 1)) del G_state_dict_new['P.weight'] adain = adain.view(1, -1, 2) assert adain.size(1) == G.adain_param_count G_state_dict_new['adain'] = adain.data G.load_state_dict(G_state_dict_new) D_state_dict_new = D_state_dict.copy() del D_state_dict_new['embedding.weight'] w0 = D_state_dict_new['w0'] w = w0 + e_hat_mean del D_state_dict_new['w0'] D_state_dict_new['w'] = w.data D.load_state_dict(D_state_dict_new) # initial output before finetuning x_hat = G(torch.unsqueeze(_y_t, dim=0)) to_pil_image(x_hat.data.cpu(), model1.input_normalize) \ .save(os.path.join(image_path, 'x_hat.jpg')) G = G.train() set_grad_enabled(G, True) D = D.train() set_grad_enabled(D, True) optim_EG = optim.Adam(G.parameters(), lr=config.lr_EG, betas=config.adam_betas) optim_D = optim.Adam(D.parameters(), lr=config.lr_D, betas=config.adam_betas) for epoch in range(config.finetune_epoch): for _, (xx, yy) in enumerate(dataloader): xx = xx.cuda() yy = yy.cuda() optim_EG.zero_grad() optim_D.zero_grad() g_output = G(yy) # train G and D d_output = D(xx, yy) d_output_hat = D(g_output, yy) loss_eg_all = L_EG(xx, g_output, d_output, d_output_hat) loss_eg = loss_eg_all['eg_loss'] loss_dsc_all = L_DSC(d_output, d_output_hat) loss_dsc = loss_dsc_all['dsc_loss'] loss = loss_eg + loss_dsc loss.backward() optim_EG.step() optim_D.step() # train D for more times for i in range(config.d_step - 1): optim_D.zero_grad() g_output = g_output.detach( ) # do not need to train the generator d_output = D(xx, yy) d_output_hat = D(g_output, yy) loss_dsc_all = L_DSC(d_output, d_output_hat) loss_dsc = loss_dsc_all['dsc_loss'] loss_dsc.backward() optim_D.step() with torch.no_grad(): x_hat_ii = G(torch.unsqueeze(_y_t, dim=0)) to_pil_image(x_hat_ii.data.cpu(), model1.input_normalize) \ .save(os.path.join(image_path, 'x_hat_%d.jpg' % epoch)) # save image to_pil_image(_x_t.data.cpu(), model1.input_normalize) \ .save(os.path.join(image_path, 'x_t.jpg')) to_pil_image(_y_t.data.cpu(), model1.input_normalize) \ .save(os.path.join(image_path, 'y_t.jpg')) for j in range(config.finetune_T): to_pil_image(_x[j].data.cpu(), model1.input_normalize) \ .save(os.path.join(image_path, 'x_%d.jpg' % j))
print('Entity vocab size', len(entity_to_id)) print('relation vocab size', len(relation_to_id)) print('Train size', len(train_features)) print('Valid size', len(valid_features)) print('test size', len(test_features)) import sys sys.stdout.flush() valid_features = torch.LongTensor(valid_features) valid_labels = torch.LongTensor(valid_labels) test_features = torch.LongTensor(test_features) test_labels = torch.LongTensor(test_labels) valid_set = TensorDataset(valid_features, valid_labels) test_set = TensorDataset(test_features, test_labels) valid_loader = DataLoader(valid_set, batch_size=args.batch_size, shuffle=False) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) def get_train_loader(train_features, neg_samples=1): new_features = [] new_labels = [] for i in (range(len(train_features))): samples = 0 s, r, o = train_features[i] new_features.append([s, r, o]) new_labels.append(1) while samples < neg_samples:
def fit_diffeomorphism_model(self, X, t, X_d, learning_rate=1e-2, learning_decay=0.95, n_epochs=50, train_frac=0.8, l2=1e1, batch_size=64, initialize=True, verbose=True, X_val=None, t_val=None, Xd_val=None): """fit_diffeomorphism_model Arguments: X {numpy array [Ntraj,Nt,Ns]} -- state t {numpy array [Ntraj,Nt]} -- time vector X_d {numpy array [Ntraj,Nt,Ns]} -- desired state Keyword Arguments: learning_rate {[type]} -- (default: {1e-2}) learning_decay {float} -- (default: {0.95}) n_epochs {int} -- (default: {50}) train_frac {float} -- ratio of training and testing (default: {0.8}) l2 {[type]} -- L2 penalty term (default: {1e1}) jacobian_penalty {[type]} -- (default: {1.}) batch_size {int} -- (default: {64}) initialize {bool} -- flag to warm start (default: {True}) verbose {bool} -- (default: {True}) X_val {numpy array [Ntraj,Nt,Ns]} -- state in validation set (default: {None}) t_val {numpy array [Ntraj,Nt]} -- time in validation set (default: {None}) Xd_val {numpy array [Ntraj,Nt,Ns]} -- desired state in validation set (default: {None}) Returns: float -- val_losses[-1] """ device = 'cuda' if cuda.is_available() else 'cpu' X, X_dot, X_d, X_d_dot, t = self.process(X=X, t=t, X_d=X_d) # Prepare data for pytorch: manual_seed(42) # Fix seed for reproducibility if self.traj_input: X_tensor = from_numpy( npconcatenate( (X, X_d, X_dot, X_d_dot, np.zeros_like(X)), axis=1)) #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)] else: X_tensor = from_numpy( npconcatenate( (X, X_dot, np.zeros_like(X)), axis=1)) # [x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)] y_target = X_dot - (dot(self.A_cl, X.T) + dot(self.BK, X_d.T)).T y_tensor = from_numpy(y_target) X_tensor.requires_grad_(True) # Builds dataset with all data dataset = TensorDataset(X_tensor, y_tensor) if X_val is None or t_val is None or Xd_val is None: # Splits randomly into train and validation datasets n_train = int(train_frac * X.shape[0]) n_val = X.shape[0] - n_train train_dataset, val_dataset = random_split(dataset, [n_train, n_val]) # Builds a loader for each dataset to perform mini-batch gradient descent train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size) else: #Uses X,... as training data and X_val,... as validation data X_val, X_dot_val, Xd_val, Xd_dot_val, t_val = self.process( X=X_val, t=t_val, X_d=Xd_val) if self.traj_input: X_val_tensor = from_numpy( npconcatenate((X_val, Xd_val, X_dot_val, Xd_dot_val, np.zeros_like(X_val)), axis=1) ) #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)] else: X_val_tensor = from_numpy( npconcatenate( (X_val, X_dot_val, np.zeros_like(X_val)), axis=1)) # [x (1,n), x_dot (1,n), zeros (1,n)] y_target_val = X_dot_val - dot(self.A_cl, X_val.T + dot(self.BK, Xd_val.T)).T y_val_tensor = from_numpy(y_target_val) X_val_tensor.requires_grad_(True) val_dataset = TensorDataset(X_val_tensor, y_val_tensor) # Builds a loader for each dataset to perform mini-batch gradient descent train_loader = DataLoader(dataset=dataset, batch_size=int(batch_size), shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=int(batch_size)) # Set up optimizer and learning rate scheduler: optimizer = optim.Adam(self.diffeomorphism_model.parameters(), lr=learning_rate, weight_decay=l2) lambda1 = lambda epoch: learning_decay**epoch scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) def make_train_step(model, loss_fn, optimizer): def train_step(x, y): model.train() # Set model to training mode y_pred = model(x) loss = loss_fn(y, y_pred, model.training) loss.backward() optimizer.step() return loss.item() return train_step batch_loss = [] losses = [] batch_val_loss = [] val_losses = [] train_step = make_train_step( self.diffeomorphism_model, self.diffeomorphism_model.diffeomorphism_loss, optimizer) # Initialize model weights: def init_normal(m): if type(m) == nn.Linear: nn.init.xavier_normal_(m.weight) if initialize: self.diffeomorphism_model.apply(init_normal) # Training loop for i in range(n_epochs): # Uses loader to fetch one mini-batch for training #print('Training epoch ', i) for x_batch, y_batch in train_loader: # Send mini batch data to same location as model: x_batch = x_batch.to(device) y_batch = y_batch.to(device) #print('Training: ', x_batch.shape, y_batch.shape) # Train based on current batch: batch_loss.append(train_step(x_batch, y_batch)) optimizer.zero_grad() losses.append(sum(batch_loss) / len(batch_loss)) batch_loss = [] #print('Validating epoch ', i) with no_grad(): for x_val, y_val in val_loader: # Sends data to same device as model x_val = x_val.to(device) y_val = y_val.to(device) #print('Validation: ', x_val.shape, y_val.shape) self.diffeomorphism_model.eval( ) # Change model model to evaluation #xt_val = x_val[:, :2*self.n] # [x, x_d] #xdot_val = x_val[:, 2*self.n:] # [xdot] y_pred = self.diffeomorphism_model(x_val) # Predict #jacobian_xdot_val, zero_jacobian_val = calc_gradients(xt_val, xdot_val, yhat, None, None, self.diffeomorphism_model.training) batch_val_loss.append( float( self.diffeomorphism_model.diffeomorphism_loss( y_val, y_pred, self.diffeomorphism_model. training))) # Compute validation loss val_losses.append(sum(batch_val_loss) / len(batch_val_loss)) # Save validation loss batch_val_loss = [] scheduler.step(i) if verbose: print(' - Epoch: ', i, ' Training loss:', format(losses[-1], '08f'), ' Validation loss:', format(val_losses[-1], '08f')) print( 'Improvement metric (for early stopping): ', sum( abs( array(val_losses[-min(3, len(val_losses)):]) - val_losses[-1])) / (3 * val_losses[-min(3, len(val_losses))])) if i > n_epochs / 4 and sum( abs( array(val_losses[-min(3, len(val_losses)):]) - val_losses[-1])) / ( 3 * val_losses[-min(3, len(val_losses))]) < 0.01: #print('Early stopping activated') break return val_losses[-1]
def load_data_fNRI(batch_size=1, sim_folder='', shuffle=True, data_folder='data'): # the edges numpy arrays below are [ num_sims, N, N ] loc_train = np.load(path.join(data_folder,sim_folder,'loc_train.npy')) vel_train = np.load(path.join(data_folder,sim_folder,'vel_train.npy')) edges_train = np.load(path.join(data_folder,sim_folder,'edges_train.npy')) loc_valid = np.load(path.join(data_folder,sim_folder,'loc_valid.npy')) vel_valid = np.load(path.join(data_folder,sim_folder,'vel_valid.npy')) edges_valid = np.load(path.join(data_folder,sim_folder,'edges_valid.npy')) loc_test = np.load(path.join(data_folder,sim_folder,'loc_test.npy')) vel_test = np.load(path.join(data_folder,sim_folder,'vel_test.npy')) edges_test = np.load(path.join(data_folder,sim_folder,'edges_test.npy')) # [num_samples, num_timesteps, num_dims, num_atoms] num_atoms = loc_train.shape[3] loc_max = loc_train.max() loc_min = loc_train.min() vel_max = vel_train.max() vel_min = vel_train.min() # Normalize to [-1, 1] loc_train = (loc_train - loc_min) * 2 / (loc_max - loc_min) - 1 vel_train = (vel_train - vel_min) * 2 / (vel_max - vel_min) - 1 loc_valid = (loc_valid - loc_min) * 2 / (loc_max - loc_min) - 1 vel_valid = (vel_valid - vel_min) * 2 / (vel_max - vel_min) - 1 loc_test = (loc_test - loc_min) * 2 / (loc_max - loc_min) - 1 vel_test = (vel_test - vel_min) * 2 / (vel_max - vel_min) - 1 # Reshape to: [num_sims, num_atoms, num_timesteps, num_dims] loc_train = np.transpose(loc_train, [0, 3, 1, 2]) vel_train = np.transpose(vel_train, [0, 3, 1, 2]) feat_train = np.concatenate([loc_train, vel_train], axis=3) loc_valid = np.transpose(loc_valid, [0, 3, 1, 2]) vel_valid = np.transpose(vel_valid, [0, 3, 1, 2]) feat_valid = np.concatenate([loc_valid, vel_valid], axis=3) loc_test = np.transpose(loc_test, [0, 3, 1, 2]) vel_test = np.transpose(vel_test, [0, 3, 1, 2]) feat_test = np.concatenate([loc_test, vel_test], axis=3) edges_train = loader_edges_encode( edges_train, num_atoms ) edges_valid = loader_edges_encode( edges_valid, num_atoms ) edges_test = loader_edges_encode( edges_test, num_atoms ) edges_train = torch.LongTensor(edges_train) edges_valid = torch.LongTensor(edges_valid) edges_test = torch.LongTensor(edges_test) feat_train = torch.FloatTensor(feat_train) feat_valid = torch.FloatTensor(feat_valid) feat_test = torch.FloatTensor(feat_test) train_data = TensorDataset(feat_train, edges_train) valid_data = TensorDataset(feat_valid, edges_valid) test_data = TensorDataset(feat_test, edges_test) train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=shuffle) valid_data_loader = DataLoader(valid_data, batch_size=batch_size) test_data_loader = DataLoader(test_data, batch_size=batch_size) return train_data_loader, valid_data_loader, test_data_loader, loc_max, loc_min, vel_max, vel_min
import torch from torch.utils.data.dataloader import DataLoader from torch.utils.data.sampler import BatchSampler from mlbaselines.sampler import RandomSampler from torch.utils.data.dataset import TensorDataset data = torch.ones((100, 1)) for i in range(100): data[i] = data[i] * i dataset = TensorDataset(data) sampler = RandomSampler(dataset, seed=1) batch_sampler = BatchSampler(sampler, batch_size=2, drop_last=True) loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=2, batch_size=1) for b in loader: print(b[0])
train_df = stocks.drop(test_symbols, axis=0) test_df = stocks.drop(stocks.index.difference(test_symbols), axis=0) train_symbols = train_df.index.unique().tolist() train_tensors = [] train_seq_lens = [] for sym in train_symbols: stock_data, stock_data_len = prepare_stock_data(sym) stock_data = normalize_stock_data(stock_data) stock_tensor = torch.Tensor(stock_data) train_seq_lens.append(stock_data_len) train_tensors.append(stock_tensor) X = pad_sequence(train_tensors).T.unsqueeze(-1) y = torch.Tensor(train_seq_lens) train_dataset = TensorDataset(X, y) test_seq_lens = [] test_tensors = [] for sym in test_symbols: stock_data, stock_data_len = prepare_stock_data(sym) stock_data = normalize_stock_data(stock_data) test_seq_lens.append(stock_data_len) stock_tensor = torch.Tensor(stock_data) test_tensors.append(stock_tensor) X = pad_sequence(test_tensors).T.unsqueeze(-1) y = torch.Tensor(test_seq_lens) test_dataset = TensorDataset(X, y) train_loader = torch.utils.data.DataLoader(train_dataset, **train_kwargs)
def load_numpy_data(args, data, adjacency): G = nx.from_numpy_matrix(adjacency, create_using=nx.DiGraph) data = torch.Tensor(data) data = TensorDataset(data, data) data_loader = DataLoader(data, batch_size=args.batch_size) return data_loader, G
def main(argv): # Argparse parser = argparse.ArgumentParser(description='PyTorch Shakespeare Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--save-directory', type=str, default='output/shakespeare', help='output directory') parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train') parser.add_argument('--batch-len', type=int, default=200, metavar='N', help='Batch length') parser.add_argument('--hidden-dim', type=int, default=256, metavar='N', help='Hidden dim') parser.add_argument('--embedding-dim', type=int, default=128, metavar='N', help='Embedding dim') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args(argv) args.cuda = not args.no_cuda and torch.cuda.is_available() # Read and process data corpus = read_corpus() print("Corpus: {}...{}".format(corpus[:50], corpus[-50:])) print("Total character count: {}".format(len(corpus))) chars, charmap = get_charmap(corpus) charcount = len(chars) print("Unique character count: {}".format(len(chars))) array = map_corpus(corpus, charmap) targets = batchify(array, args=args) inputs = make_inputs(targets) print('***TARGETS***') print(targets) print('***INPUTS***') print(inputs) dataset = TensorDataset(torch.from_numpy(inputs), torch.from_numpy(targets)) # Train or load a model checkpoint_path = os.path.join(args.save_directory, 'checkpoint.pytorch') if not os.path.exists(checkpoint_path): model = TextModel(charcount=charcount, args=args) train_model(model=model, dataset=dataset, args=args) else: trainer = Trainer().load(from_directory=args.save_directory) model = TextModel(charcount=charcount, args=args) model.load_state_dict(trainer.model.state_dict()) if args.cuda: model = model.cuda() # Generate deterministic text print("Deterministic") generated = generate(model, sequence_length=1000, batch_size=2, stochastic=False, args=args).data.cpu().numpy() print_generated(to_text(preds=generated, charset=chars)) # Seed deterministic text seeds = ['KING RICHARD', 'KING RICHARD', 'Enter Falsta', 'SHAKESPEARE '] assert len(set(len(s) for s in seeds)) == 1 inp = np.array([[charmap[c] for c in l] for l in seeds], dtype=np.int64) inp = np.pad(inp + 1, [(0, 0), (1, 0)], mode='constant') inp = Variable(torch.from_numpy(inp)) if args.cuda: inp = inp.cuda() # Generate stochastic text generated = generate(model, sequence_length=2000, batch_size=5, stochastic=False, inp=inp, args=args).data.cpu().numpy() text = to_text(preds=generated, charset=chars) for i, (s, t) in enumerate(zip(seeds, text)): print("Deterministic #{} (seed={}): {}".format(i, s, t)) # Generate stochastic text print("Stochastic") generated = generate(model, sequence_length=1000, batch_size=5, stochastic=True, args=args).data.cpu().numpy() print_generated(to_text(preds=generated, charset=chars))
def load_data_vis(batch_size=1, suffix=''): loc_train = np.load('data/loc_train' + suffix + '.npy') edges_train = np.load('data/edges_train' + suffix + '.npy') path_train = np.load('data/path_train' + suffix + '.npy') type_train = np.load('data/type_train' + suffix + '.npy') loc_valid = np.load('data/loc_valid' + suffix + '.npy') edges_valid = np.load('data/edges_valid' + suffix + '.npy') path_valid = np.load('data/path_valid' + suffix + '.npy') type_valid = np.load('data/type_valid' + suffix + '.npy') loc_test = np.load('data/loc_test' + suffix + '.npy') edges_test = np.load('data/edges_test' + suffix + '.npy') path_test = np.load('data/path_test' + suffix + '.npy') type_test = np.load('data/type_test' + suffix + '.npy') # [num_samples, num_timesteps, num_dims, num_atoms] num_kps = loc_train.shape[2] max_min_train = [] maxmin_train = np.zeros((loc_train.shape[0], 4, loc_train.shape[2])) for inc, val in enumerate(loc_train): loc_max_x = np.max((val[0].max(), val[2].max())) loc_min_x = np.min((val[0].min(), val[2].min())) loc_max_y = np.max((val[1].max(), val[3].max())) loc_min_y = np.min((val[1].min(), val[3].min())) val[0] = (val[0] - loc_min_x) * 2 / (loc_max_x - loc_min_x) - 1 val[1] = (val[1] - loc_min_y) * 2 / (loc_max_y - loc_min_y) - 1 val[2] = (val[2] - loc_min_x) * 2 / (loc_max_x - loc_min_x) - 1 val[3] = (val[3] - loc_min_y) * 2 / (loc_max_y - loc_min_y) - 1 val[4] = val[4] loc_train[inc] = val maxmin_train[inc][0] = np.ones(loc_train[inc][0].shape[0]) * loc_max_x maxmin_train[inc][1] = np.ones(loc_train[inc][0].shape[0]) * loc_min_x maxmin_train[inc][2] = np.ones(loc_train[inc][0].shape[0]) * loc_max_y maxmin_train[inc][3] = np.ones(loc_train[inc][0].shape[0]) * loc_min_y max_min_train.append( (loc_max_x, loc_min_x, loc_max_y, loc_min_y, path_train[inc])) max_min_valid = [] maxmin_valid = np.zeros((loc_valid.shape[0], 4, loc_valid.shape[2])) for inc, val in enumerate(loc_valid): loc_max_x = np.max((val[0].max(), val[2].max())) loc_min_x = np.min((val[0].min(), val[2].min())) loc_max_y = np.max((val[1].max(), val[3].max())) loc_min_y = np.min((val[1].min(), val[3].min())) val[0] = (val[0] - loc_min_x) * 2 / (loc_max_x - loc_min_x) - 1 val[1] = (val[1] - loc_min_y) * 2 / (loc_max_y - loc_min_y) - 1 val[2] = (val[2] - loc_min_x) * 2 / (loc_max_x - loc_min_x) - 1 val[3] = (val[3] - loc_min_y) * 2 / (loc_max_y - loc_min_y) - 1 val[4] = val[4] loc_valid[inc] = val maxmin_valid[inc][0] = np.ones(loc_valid[inc][0].shape[0]) * loc_max_x maxmin_valid[inc][1] = np.ones(loc_valid[inc][0].shape[0]) * loc_min_x maxmin_valid[inc][2] = np.ones(loc_valid[inc][0].shape[0]) * loc_max_y maxmin_valid[inc][3] = np.ones(loc_valid[inc][0].shape[0]) * loc_min_y max_min_valid.append( (loc_max_x, loc_min_x, loc_max_y, loc_min_y, path_valid[inc])) max_min_test = [] maxmin_test = np.zeros((loc_test.shape[0], 4, loc_test.shape[2])) for inc, val in enumerate(loc_test): loc_max_x = np.max((val[0].max(), val[2].max())) loc_min_x = np.min((val[0].min(), val[2].min())) loc_max_y = np.max((val[1].max(), val[3].max())) loc_min_y = np.min((val[1].min(), val[3].min())) val[0] = (val[0] - loc_min_x) * 2 / (loc_max_x - loc_min_x) - 1 val[1] = (val[1] - loc_min_y) * 2 / (loc_max_y - loc_min_y) - 1 val[2] = (val[2] - loc_min_x) * 2 / (loc_max_x - loc_min_x) - 1 val[3] = (val[3] - loc_min_y) * 2 / (loc_max_y - loc_min_y) - 1 val[4] = val[4] maxmin_test[inc][0] = np.ones(loc_test[inc][0].shape[0]) * loc_max_x maxmin_test[inc][1] = np.ones(loc_test[inc][0].shape[0]) * loc_min_x maxmin_test[inc][2] = np.ones(loc_test[inc][0].shape[0]) * loc_max_y maxmin_test[inc][3] = np.ones(loc_test[inc][0].shape[0]) * loc_min_y max_min_test.append( (loc_max_x, loc_min_x, loc_max_y, loc_min_y, path_test[inc])) # Normalize to [-1, 1] #loc_valid = (loc_valid - loc_min) * 2 / (loc_max - loc_min) - 1 #loc_test = (loc_test - loc_min) * 2 / (loc_max - loc_min) - 1 # Reshape to: [num_sims, num_atoms, num_timesteps, num_dims] loc_train = np.transpose(loc_train, [0, 2, 1]) maxmin_train = np.transpose(maxmin_train, [0, 2, 1]) feat_train = np.concatenate((loc_train, type_train), axis=2) feat_train = np.concatenate((feat_train, maxmin_train), axis=2) edges_train = np.reshape(edges_train, [-1, num_kps**2]) edges_train = np.array((edges_train + 1), dtype=np.int64) loc_valid = np.transpose(loc_valid, [0, 2, 1]) maxmin_valid = np.transpose(maxmin_valid, [0, 2, 1]) feat_valid = np.concatenate((loc_valid, type_valid), axis=2) feat_valid = np.concatenate((feat_valid, maxmin_valid), axis=2) edges_valid = np.reshape(edges_valid, [-1, num_kps**2]) edges_valid = np.array((edges_valid + 1), dtype=np.int64) loc_test = np.transpose(loc_test, [0, 2, 1]) maxmin_test = np.transpose(maxmin_test, [0, 2, 1]) feat_test = np.concatenate((loc_test, type_test), axis=2) feat_test = np.concatenate((feat_test, maxmin_test), axis=2) edges_test = np.reshape(edges_test, [-1, num_kps**2]) edges_test = np.array((edges_test + 1), dtype=np.int64) feat_train = torch.FloatTensor(feat_train) edges_train = torch.LongTensor(edges_train) feat_valid = torch.FloatTensor(feat_valid) edges_valid = torch.LongTensor(edges_valid) feat_test = torch.FloatTensor(feat_test) edges_test = torch.LongTensor(edges_test) # Exclude self edges off_diag_idx = np.ravel_multi_index( np.where(np.ones((num_kps, num_kps)) - np.eye(num_kps)), [num_kps, num_kps]) edges_train = edges_train[:, off_diag_idx] edges_valid = edges_valid[:, off_diag_idx] edges_test = edges_test[:, off_diag_idx] train_data = TensorDataset(feat_train, edges_train) valid_data = TensorDataset(feat_valid, edges_valid) test_data = TensorDataset(feat_test, edges_test) train_data_loader = DataLoader(train_data, batch_size=batch_size) valid_data_loader = DataLoader(valid_data, batch_size=batch_size) test_data_loader = DataLoader(test_data, batch_size=batch_size) return train_data_loader, path_train, max_min_train, valid_data_loader, path_valid, max_min_valid, test_data_loader, path_test, max_min_test