def train(model, iterator, optimizer, loss_type = 'order'): '''Run the training loop on a training dataset''' epoch_loss = 0 epoch_order_loss = 0 epoch_mean = 0 epoch_std = 0 model.train() for batch in iterator: images = batch[0].to(device,non_blocking=True) ranks = batch[1].to(device,non_blocking=True) predictions, log_vars = model(images) loss, order_loss = pairwise_loss(predictions, log_vars, ranks, loss_type) loss.backward() optimizer.step() model.zero_grad() epoch_mean += torch.mean(predictions).cpu().item() epoch_std += torch.std(predictions).item() epoch_order_loss += order_loss.item() epoch_loss += loss.item() epoch_loss /= len(iterator) epoch_order_loss /= len(iterator) epoch_mean /= len(iterator) epoch_std /= len(iterator) return epoch_loss, epoch_order_loss, epoch_mean, epoch_std
def main(): options = parse_args() N_train = options.n_train N_valid = options.n_valid D_in = options.n_input D_out = options.n_output epochs = options.n_epoch batch_size = options.n_batch n_sampling_combs = options.n_sampling X_data, X_valid, y_data, y_valid = make_dataset(N_train, N_valid, D_in) net = Net(D_in, D_out) opt = optim.Adam(net.parameters()) for epoch in range(1, epochs + 1): index = torch.randperm(N_train) X_train = X_data[index] y_train = y_data[index] for cur_batch in range(0, N_train, batch_size): X_batch = X_train[cur_batch:cur_batch + batch_size] y_batch = y_train[cur_batch:cur_batch + batch_size] opt.zero_grad() batch_loss = torch.zeros(1) if X_batch is not None: preds = net(X_batch) for _ in range(n_sampling_combs): i, j = np.random.choice(range(preds.shape[0]), 2) s_i = preds[i] s_j = preds[j] if y_batch[i] > y_batch[j]: S_ij = 1 elif y_batch[i] == y_batch[j]: S_ij = 0 else: S_ij = -1 loss = pairwise_loss(s_i, s_j, S_ij) batch_loss += loss batch_loss.backward(retain_graph=True) opt.step() with torch.no_grad(): valid_preds = net(X_valid) valid_swapped_pairs = swapped_pairs(valid_preds, y_valid) print( f"epoch: {epoch} valid swapped pairs: {valid_swapped_pairs}/{N_valid*(N_valid-1)//2}" ) print('DONE')
def train(args): ## tensorboardX tflog_path = osp.join(args.output_path, "tflog") if os.path.exists(tflog_path): shutil.rmtree(tflog_path) writer = SummaryWriter(logdir=tflog_path) ## prepare data train_transform = prep.image_train(resize_size=256, crop_size=224) train_set = ImageDataset(args.train_path, transform=train_transform) train_loader = util_data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=4) device = "cuda" if torch.cuda.is_available() else "cpu" ## set base network model = load_model(args.net, args.bit) writer.add_graph(model, input_to_model=(torch.rand(2, 3, 224, 224),)) model.to(device) if device == 'cuda': cudnn.benchmark = True model.train() ## set optimizer and scheduler parameter_list = [{"params":model.feature_layers.parameters(), "lr":args.lr}, \ {"params":model.hash_layer.parameters(), "lr":args.lr*10}] optimizer = optim.SGD(parameter_list, lr=args.lr, momentum=0.9, weight_decay=0.005, nesterov=True) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3000, gamma=0.5) ## train for i in range(args.num_iter): scheduler.step() optimizer.zero_grad() if i % (len(train_loader)-1) == 0: train_iter = iter(train_loader) inputs, labels = train_iter.next() inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) s_loss = loss.pairwise_loss(outputs, labels, alpha=args.alpha, class_num=args.class_num) q_loss = loss.quantization_loss(outputs) total_loss = s_loss + 0.01 * q_loss total_loss.backward() optimizer.step() writer.add_scalar('similarity loss', s_loss, i) writer.add_scalar('quantization loss', q_loss, i) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], i) if i % 10 == 0: print("{} #train# Iter: {:05d}, loss: {:.3f} quantizaion loss: {:.3f}".format( datetime.now(), i, s_loss.item(), q_loss.item())) writer.close() torch.save(model, osp.join(args.output_path, "model.pth"))
def train(config): ## set pre-process prep_dict = {} prep_config = config["prep"] prep_dict["train_set1"] = prep.image_train( \ resize_size=prep_config["resize_size"], \ crop_size=prep_config["crop_size"]) prep_dict["train_set2"] = prep.image_train( \ resize_size=prep_config["resize_size"], \ crop_size=prep_config["crop_size"]) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] dsets["train_set1"] = ImageList(open(data_config["train_set1"]["list_path"]).readlines(), \ transform=prep_dict["train_set1"]) dset_loaders["train_set1"] = util_data.DataLoader(dsets["train_set1"], \ batch_size=data_config["train_set1"]["batch_size"], \ shuffle=True, num_workers=4) dsets["train_set2"] = ImageList(open(data_config["train_set2"]["list_path"]).readlines(), \ transform=prep_dict["train_set2"]) dset_loaders["train_set2"] = util_data.DataLoader(dsets["train_set2"], \ batch_size=data_config["train_set2"]["batch_size"], \ shuffle=True, num_workers=4) hash_bit = config["hash_bit"] ## set base network net_config = config["network"] base_network = net_config["type"](**net_config["params"]) use_gpu = torch.cuda.is_available() if use_gpu: base_network = base_network.cuda() ## collect parameters parameter_list = [{"params":base_network.feature_layers.parameters(), "lr":1}, \ {"params":base_network.hash_layer.parameters(), "lr":10}] ## set optimizer optimizer_config = config["optimizer"] optimizer = optim_dict[optimizer_config["type"]](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] ## train len_train1 = len(dset_loaders["train_set1"]) - 1 len_train2 = len(dset_loaders["train_set2"]) - 1 transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 best_acc = 0.0 for i in range(config["num_iterations"]): if i % config["snapshot_interval"] == 0: torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ "iter_{:05d}_model.pth.tar".format(i))) ## train one iter base_network.train(True) optimizer = lr_scheduler(param_lr, optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train1 == 0: iter1 = iter(dset_loaders["train_set1"]) if i % len_train2 == 0: iter2 = iter(dset_loaders["train_set2"]) inputs1, labels1 = iter1.next() inputs2, labels2 = iter2.next() if use_gpu: inputs1, inputs2, labels1, labels2 = \ Variable(inputs1).cuda(), Variable(inputs2).cuda(), \ Variable(labels1).cuda(), Variable(labels2).cuda() else: inputs1, inputs2, labels1, labels2 = Variable(inputs1), \ Variable(inputs2), Variable(labels1), Variable(labels2) inputs = torch.cat((inputs1, inputs2), dim=0) outputs = base_network(inputs) similarity_loss = loss.pairwise_loss(outputs.narrow(0,0,inputs1.size(0)), \ outputs.narrow(0,inputs1.size(0),inputs2.size(0)), \ labels1, labels2, \ hashbit = hash_bit,\ gamma=config["loss"]["gamma"], \ normed=config["loss"]["normed"], \ q_lambda=config["loss"]["q_lambda"]) total_loss_value = total_loss_value + similarity_loss.float().data[0] similarity_loss.backward() if (i + 1) % len_train1 == 0: print("Epoch: {:05d}, loss: {:.3f}".format(i // len_train1, total_loss_value)) total_loss_value = 0.0 optimizer.step()
inputs = torch.cat((input_x_one, input_x_two), dim=0) center_features, codes, outputs = model(inputs) #output_one = model(input_x_one) #output_two = model(input_x_two) #print (output_one) #print (output_two.shape) input_y_one_bi = torch.tensor( label_binarize(input_y_one, np.arange(n_classes))).cuda() input_y_two_bi = torch.tensor( label_binarize(input_y_two, np.arange(n_classes))).cuda() similarity_loss = loss.pairwise_loss(codes.narrow(0,0,input_x_one.size(0)), \ codes.narrow(0,input_x_one.size(0),input_x_two.size(0)), \ input_y_one_bi, input_y_two_bi, \ sigmoid_param=sigmoid_param, \ l_threshold=l_threshold, \ class_num=class_num) center_loss = center_loss_func( center_features, torch.cat((input_y_one, input_y_two), dim=0)) classify_loss_one = loss_func( outputs.narrow(0, 0, input_x_one.size(0)), input_y_one) classify_loss_two = loss_func( outputs.narrow(0, input_x_one.size(0), input_x_two.size(0)), input_y_two) classify_loss = classify_loss_one + classify_loss_two #total_loss = similarity_loss + classify_loss + center_loss total_loss = classify_loss + center_loss optimizer.zero_grad() total_loss.backward() optimizer.step()
def train(config): ## set pre-process prep_dict = {} prep_config = config["prep"] prep_dict["train_set1"] = prep.image_train( \ resize_size=prep_config["resize_size"], \ crop_size=prep_config["crop_size"]) prep_dict["train_set2"] = prep.image_train( \ resize_size=prep_config["resize_size"], \ crop_size=prep_config["crop_size"]) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] dsets["train_set1"] = ImageList(open(data_config["train_set1"]["list_path"]).readlines(), \ transform=prep_dict["train_set1"]) dset_loaders["train_set1"] = util_data.DataLoader(dsets["train_set1"], \ batch_size=data_config["train_set1"]["batch_size"], \ shuffle=True, num_workers=4) dsets["train_set2"] = ImageList(open(data_config["train_set2"]["list_path"]).readlines(), \ transform=prep_dict["train_set2"]) dset_loaders["train_set2"] = util_data.DataLoader(dsets["train_set2"], \ batch_size=data_config["train_set2"]["batch_size"], \ shuffle=True, num_workers=4) hash_bit = config["hash_bit"] ## set base network net_config = config["network"] base_network = net_config["type"](**net_config["params"]) use_gpu = torch.cuda.is_available() if use_gpu: base_network = base_network.cuda() ## collect parameters parameter_list = [{"params":base_network.feature_layers.parameters(), "lr":1}, \ {"params":base_network.hash_layer.parameters(), "lr":10}] ## set optimizer optimizer_config = config["optimizer"] optimizer = optim_dict[optimizer_config["type"]](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] ## train len_train1 = len(dset_loaders["train_set1"]) - 1 len_train2 = len(dset_loaders["train_set2"]) - 1 transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 best_acc = 0.0 for i in range(config["num_iterations"]): if i % config["snapshot_interval"] == 0: torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ "iter_{:05d}_model.pth.tar".format(i))) ## train one iter base_network.train(True) optimizer = lr_scheduler(param_lr, optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train1 == 0: iter1 = iter(dset_loaders["train_set1"]) if i % len_train2 == 0: iter2 = iter(dset_loaders["train_set2"]) inputs1, labels1 = iter1.next() # 取出train_set1的 image和对应的labels inputs2, labels2 = iter2.next() # 随机显示一个batch_size的图像,batch=36 # print(inputs1.shape) # torch.Size([36, 3, 224, 224]) batch_size 36 图片224*224*3通道 # cv2.imshow(torchvision.utils.make_grid(inputs1)) if use_gpu: inputs1, inputs2, labels1, labels2 = \ Variable(inputs1).cuda(), Variable(inputs2).cuda(), \ Variable(labels1).cuda(), Variable(labels2).cuda() else: inputs1, inputs2, labels1, labels2 = Variable(inputs1), \ Variable(inputs2), Variable(labels1), Variable(labels2) inputs = torch.cat((inputs1, inputs2), dim=0) # print(inputs.shape) # torch.Size([72, 3, 224, 224]) outputs = base_network(inputs) # print(outputs.shape) # torch.Size([72, 48]) 48是Hash code的bits # narrow: https://blog.csdn.net/u011961856/article/details/78696146 # 返回的是估计的labels? # 计算一次前向传播的loss similarity_loss = loss.pairwise_loss(outputs.narrow(0,0,inputs1.size(0)), \ outputs.narrow(0,inputs1.size(0),inputs2.size(0)), \ labels1, labels2, \ sigmoid_param=config["loss"]["sigmoid_param"], \ l_threshold=config["loss"]["l_threshold"], \ class_num=config["loss"]["class_num"]) similarity_loss.backward() # 反向传播 print("Iter: {:05d}, loss: {:.3f}".format( i, similarity_loss.float().item())) config["out_file"].write("Iter: {:05d}, loss: {:.3f} \n".format(i, \ similarity_loss.float().item())) # print("Iter: {:05d}, loss: {:.3f}".format(i, similarity_loss.float().data[0])) # config["out_file"].write("Iter: {:05d}, loss: {:.3f}".format(i, \ # similarity_loss.float().data[0])) optimizer.step()
def train(config): ## set pre-process prep_dict = {} job_dataset = config["dataset"] prep_dict["train_set1"] = trans_train(job_dataset = job_dataset) prep_dict["train_set2"] = trans_train(job_dataset = job_dataset) ## prepare data dsets = {} dset_loaders = {} data_config = config["data"] if config["dataset"] == 'cifar10': dsets['train_set1'] = ImageList(open(data_config["train_set1"]["list_path"]).readlines(), \ transform=prep_dict["train_set1"]) dsets['train_set2'] = ImageList(open(data_config["train_set2"]["list_path"]).readlines(), \ transform=prep_dict["train_set2"]) #root = '../../../../data/cifar10/pytorch_path' #if not os.path.exists(root): # os.mkdir(root) #dsets["train_set1"] = dset.CIFAR10(root=root, train=True, transform=prep_dict["train_set1"], download=True) #dsets["train_set2"] = dset.CIFAR10(root=root, train=True, transform=prep_dict["train_set2"], download=True) if config["dataset"] == 'mnist': root = '../../../../data/mnist' if not os.path.exists(root): os.mkdir(root) dsets["train_set1"] = dset.MNIST(root=root, train=True, transform=prep_dict["train_set1"], download=True) dsets["train_set2"] = dset.MNIST(root=root, train=True, transform=prep_dict["train_set2"], download=True) if config["dataset"] == 'imagenet': dsets['train_set1'] = ImageList(open(data_config["train_set1"]["list_path"]).readlines(), \ transform=prep_dict["train_set1"]) dsets['train_set2'] = ImageList(open(data_config["train_set2"]["list_path"]).readlines(), \ transform=prep_dict["train_set2"]) if config["dataset"] == 'fashion_mnist': root = '../../../../data/fashion_mnist' if not os.path.exists(root): os.mkdir(root) dsets["train_set1"] = dset.FashionMNIST(root=root, train=True, transform=prep_dict["train_set1"], download=True) dsets["train_set2"] = dset.FashionMNIST(root=root, train=True, transform=prep_dict["train_set2"], download=True) # The reason of the randomness in training process is # "shuffle=True" dset_loaders["train_set1"] = util_data.DataLoader(dsets["train_set1"], \ batch_size=data_config["train_set1"]["batch_size"], \ shuffle=True, num_workers=4) dset_loaders["train_set2"] = util_data.DataLoader(dsets["train_set2"], \ batch_size=data_config["train_set2"]["batch_size"], \ shuffle=True, num_workers=4) ## set base network net_config = config["network"] base_network = net_config["type"](**net_config["params"]) use_gpu = torch.cuda.is_available() if use_gpu: base_network = base_network.cuda() ## collect parameters parameter_list = [{"params":base_network.feature_layers.parameters(), "lr":1}, \ {"params":base_network.hash_layer.parameters(), "lr":10}] ## set optimizer optimizer_config = config["optimizer"] optimizer = optim_dict[optimizer_config["type"]](parameter_list, \ **(optimizer_config["optim_params"])) param_lr = [] for param_group in optimizer.param_groups: param_lr.append(param_group["lr"]) schedule_param = optimizer_config["lr_param"] lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]] ## train len_train1 = len(dset_loaders["train_set1"]) len_train2 = len(dset_loaders["train_set2"]) for i in range(config["num_iterations"]): if i % config["snapshot_interval"] == 0: #torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \ # "iter_{:05d}_model.pth.tar".format(i))) torch.save(base_network.state_dict(), osp.join(config["output_path"], \ "iter_{:05d}_model_dict.pth.tar".format(i))) ## train one iter base_network.train(True) optimizer = lr_scheduler(param_lr, optimizer, i, **schedule_param) optimizer.zero_grad() if i % len_train1 == 0: iter1 = iter(dset_loaders["train_set1"]) if i % len_train2 == 0: iter2 = iter(dset_loaders["train_set2"]) inputs1, labels1 = iter1.next() inputs2, labels2 = iter2.next() if job_dataset == 'mnist' or job_dataset == 'fashion_mnist': # convert labels1 & s2 to one hot array labels1, labels2 = make_one_hot(labels1), make_one_hot(labels2) if use_gpu: inputs1, inputs2, labels1, labels2 = \ Variable(inputs1).cuda(), Variable(inputs2).cuda(), \ Variable(labels1).cuda(), Variable(labels2).cuda() else: inputs1, inputs2, labels1, labels2 = Variable(inputs1), \ Variable(inputs2), Variable(labels1), Variable(labels2) inputs = torch.cat((inputs1, inputs2), dim=0) outputs = base_network(inputs) similarity_loss = loss.pairwise_loss(outputs.narrow(0,0,inputs1.size(0)), \ outputs.narrow(0,inputs1.size(0),inputs2.size(0)), \ labels1, labels2, \ sigmoid_param=config["loss"]["sigmoid_param"], \ l_threshold=config["loss"]["l_threshold"], \ class_num=config["loss"]["class_num"]) similarity_loss.backward() print(("Iter: {:05d}, loss: {:.3f}".format(i, similarity_loss.item()))) config["out_file"].write("Iter: {:05d}, loss: {:.3f}\n".format(i, similarity_loss.item())) #print("Iter: {:05d}, loss: {:.3f}".format(i, similarity_loss.float().data[0])) #config["out_file"].write("Iter: {:05d}, loss: {:.3f}\n".format(i, \ # similarity_loss.float().data[0])) optimizer.step()
if config['training']['mode'] == 'pair': node_features, edge_features, from_idx, to_idx, graph_idx, labels = get_graph( batch) labels = labels.to(device) else: node_features, edge_features, from_idx, to_idx, graph_idx = get_graph( batch) graph_vectors = model(node_features.to(device), edge_features.to(device), from_idx.to(device), to_idx.to(device), graph_idx.to(device), training_n_graphs_in_batch) if config['training']['mode'] == 'pair': x, y = reshape_and_split_tensor(graph_vectors, 2) loss = pairwise_loss(x, y, labels, loss_type=config['training']['loss'], margin=config['training']['margin']) is_pos = (labels == torch.ones(labels.shape).long().to(device)).float() is_neg = 1 - is_pos n_pos = torch.sum(is_pos) n_neg = torch.sum(is_neg) sim = compute_similarity(config, x, y) sim_pos = torch.sum(sim * is_pos) / (n_pos + 1e-8) sim_neg = torch.sum(sim * is_neg) / (n_neg + 1e-8) else: x_1, y, x_2, z = reshape_and_split_tensor(graph_vectors, 4) loss = triplet_loss(x_1, y, x_2,