def train_encoder(): import os cLoader = CifarLoader(CIFAR_DIR=os.path.join("../", "cifar-10-batches-py", ""), validation_partition=1.0) train_set = cLoader.get_training_dataset() train_set.labels = train_set.data train_set.labels = train_set.labels.transpose(0, 3, 1, 2) train_opts = trn.TrainingOptions() train_opts.optimizer_type = trn.OptimizerType.Adam train_opts.learning_rate = 0.001 train_opts.learning_rate_update_by_step = False # Update schedular at epochs train_opts.learning_rate_drop_factor = 0.5 train_opts.learning_rate_drop_type = trn.SchedulerType.StepLr train_opts.learning_rate_drop_step_count = 2 train_opts.batch_size = 64 train_opts.weight_decay = 1e-5 train_opts.n_epochs = 6 train_opts.use_gpu = True train_opts.save_model = True train_opts.saved_model_name = "models/encoders/cifar_encoder" train_opts.regularization_method = None net = CifarEncoder(100) loss_fnc = nn.MSELoss() optimizer = torch.optim.Adam(net.parameters(), weight_decay=1e-5) num_epochs = 8 batch_size = 100 num_iters = int(train_set.get_element_count() / batch_size) for e in range(num_epochs): train_set.shuffle_data() for iter in range(num_iters): batch_data, batch_labels = train_set.get_batch(batch_size) out = net.forward(batch_data) loss = loss_fnc(out, batch_data) optimizer.zero_grad() loss.backward() optimizer.step() if iter % 200 == 0: print('epoch [{}/{}], iter [{}/{}], loss:{:.4f}'.format( e + 1, num_epochs, iter + 1, num_iters, loss.item())) torch.save(net.state_dict(), "cifar_encoder")
def train_student_encoded(train_set, val_set, teacher, params): EXP_NO = params.exp_no EXP_ID = params.exp_id STUDENT_TEMP = params.student_temp TEACHER_TEMP = params.teacher_temp ALPHA = params.alpha N_RULES = params.n_rules ROOT = "./models/{}".format(EXP_ID) if not os.path.exists(ROOT): os.mkdir(ROOT) ROOT = "./models/{}/{}".format(EXP_ID, EXP_NO) if not os.path.exists(ROOT): os.mkdir(ROOT) # Save Params with open(ROOT + "/params", "w") as f: json.dump(vars(args), f) STUDENT_MODEL_PATH = ROOT + "/student" train_opts = trn.TrainingOptions() train_opts.optimizer_type = trn.OptimizerType.Adam train_opts.learning_rate = 0.01 train_opts.learning_rate_drop_type = trn.SchedulerType.StepLr train_opts.learning_rate_update_by_step = False # Update at every epoch train_opts.learning_rate_drop_factor = 0.5 # Halve the learning rate train_opts.learning_rate_drop_step_count = params.learn_drop_epochs train_opts.batch_size = 128 train_opts.n_epochs = params.n_epochs train_opts.use_gpu = True train_opts.custom_validation_func = validate_distillation train_opts.save_model = False train_opts.verbose_freq = 100 train_opts.weight_decay = 1e-8 train_opts.shuffle_data = True train_opts.regularization_method = None # Define loss dist_loss = DistillationLoss(STUDENT_TEMP, TEACHER_TEMP, ALPHA) encoder = CifarEncoder(n_dims=params.n_inputs) encoder.load_state_dict(torch.load("Networks/cifar_encoder")) student = StudentEncoder(n_memberships=N_RULES, n_inputs=params.n_inputs, n_outputs=10, learnable_memberships=params.learn_ants, encoder=encoder, fuzzy_type=params.fuzzy_type, use_sigma_scale=params.use_sigma_scale, use_height_scale=params.use_height_scale) # Initialize student print("Initializing Student") train_set.shuffle_data() init_data, init_labels = train_set.get_batch(60000, 0, "cpu") student.initialize(init_data) # student.load_state_dict(torch.load(STUDENT_MODEL_PATH)) print("Done Initializing Student") # student.fuzzy_layer.draw(5) # plt.plot(student.feature_extraction(init_data)[:,1:2], np.zeros(init_data.shape[0]), 'o') # plt.show() device = "cuda:" + args.gpu_no student.to(device) # Define distillation network dist_net = DistillNet(student, teacher) trainer = trn.Trainer(dist_net, train_opts) results = trainer.train(dist_loss, train_set, val_set, is_classification=True) torch.save(student.state_dict(), STUDENT_MODEL_PATH) trn.save_train_info(results, STUDENT_MODEL_PATH + "_train_info") return student
# Load dataset if args.dataset == 4: # Quick Draw TEACHER_PATH = "./models/teacher/QuickDraw/teacher_quick_draw" metadata = [['airplanes.npy', 0], ['apple.npy', 1], ['bread.npy', 2], ['dog.npy', 3], ['guitar.npy', 4], ['lion.npy', 5], ['star.npy', 6], ['zebra.npy', 7], ['anvil.npy', 8], ['car.npy', 9]] qdLoader = QuickDrawLoader(metadata=metadata, data_root='./data/QuickDraw', max_data=10000) train_set = qdLoader.get_training_dataset() val_set = qdLoader.get_validation_dataset() test_set = qdLoader.get_test_dataset() # Teacher Options for QuickDraw teacher_train_opts = trn.TrainingOptions() teacher_train_opts.optimizer_type = trn.OptimizerType.Adam teacher_train_opts.learning_rate = 0.01 teacher_train_opts.learning_rate_update_by_step = False # Update schedular at epochs teacher_train_opts.learning_rate_drop_factor = 0.5 teacher_train_opts.learning_rate_drop_type = trn.SchedulerType.StepLr teacher_train_opts.learning_rate_drop_step_count = 2 teacher_train_opts.batch_size = 64 teacher_train_opts.weight_decay = 1e-5 teacher_train_opts.n_epochs = 8 teacher_train_opts.use_gpu = True teacher_train_opts.save_model = True teacher_train_opts.saved_model_name = TEACHER_PATH elif args.dataset == 3: TEACHER_PATH = "./models/teacher/Cifar/teacher_cifar_resnet"
n_points=1000, batch_size=-1) test_dataset = SequentialDataset(test_reg, test_labels, n_points=1) test_dataset.batch_index = 0 net = IdentificationNet(1, 1, 3) net.to("cuda:0") net.reset() def loss_fnc(pred, batch_labels): return torch.mean(torch.square(pred - batch_labels)) if TRAIN: trn_opts = trn.TrainingOptions() if NON_LINEAR: trn_opts.batch_size = 5000 trn_opts.learning_rate = 0.001 trn_opts.learning_rate_drop_type = trn.SchedulerType.StepLr trn_opts.learning_rate_drop_factor = 1 trn_opts.learning_rate_drop_step_count = 100 trn_opts.n_epochs = 5000 trn_opts.l2_reg_constant = 1E-2 trn_opts.saved_model_name = MODEL_NAME trn_opts.save_model = True trn_opts.optimizer_type = trn.OptimizerType.Adam trn_opts.verbose_freq = 500 else: #These optiosn work best for linear case