def attack_analysis(parameters, n, a_start, a_type): N = np.shape(parameter.X_test)[0] a_index = random.randint(0, 1) a_end = a_start / 100 #print("###########") tmp_array = np.copy(parameter.X_test) #print (parameter.X_test[0:2, 1:2]) if (a_type == "additive"): parameter.X_test[a_index:a_index + n + 1, :] = parameter.X_test[a_index:a_index + n + 1, :] + random.uniform( a_start, a_end) else: parameter.X_test[a_index:a_index + n + 1, :] = parameter.X_test[a_index:a_index + n + 1, :] - random.uniform( a_start, a_end) rmse_error, average_error = predict_function.predict( parameter.X_test, parameter.Y_test, parameters) parameter.X_test = np.copy(tmp_array) #print (parameter.X_test[0:2, 1:2]) return rmse_error, average_error
def diff_attack_analysis(parameters, n, a_start, a_type): N = np.shape(parameter.X_test)[0] #a_index = random.randint(0, N + 1 - n) a_index = random.randint(0, 1) a_end = a_start + a_start / 100 tmp_array = np.copy(parameter.X_test) parameter.X_test[a_index:a_index + n + 1:2, :] = parameter.X_test[a_index:a_index + n + 1:2, :] + random.uniform( a_start, a_end) parameter.X_test[a_index + 1:a_index + n + 1:2, :] = parameter.X_test[a_index + 1:a_index + n + 1:2, :] - random.uniform( a_start, a_end) rmse_error, average_error = predict_function.predict( parameter.X_test, parameter.Y_test, parameters) parameter.X_test = np.copy(tmp_array) return rmse_error, average_error
from predict_function import pred_args, read_json, load_checkpoint, predict import matplotlib.pyplot as plt # get parameters pred_arg = pred_args() # load a trained model model = load_checkpoint(pred_arg.checkpoint, pred_arg.gpu) # predict flower predict(pred_arg.image_path, model, pred_arg.gpu, pred_arg.top_k)
def main(): #parse arguments config.parse() args = config.args for k, v in vars(args).items(): logger.info(f"{k}:{v}") #set seeds torch.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) np.random.seed(args.random_seed) random.seed(args.random_seed) #arguments check device, n_gpu = args_check(args) os.makedirs(args.output_dir, exist_ok=True) forward_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) args.forward_batch_size = forward_batch_size #load config teachers_and_student = parse_model_config(args.model_config_json) #Prepare GLUE task processor = processors[args.task_name]() args.output_mode = output_modes[args.task_name] label_list = processor.get_labels() num_labels = len(label_list) #read data train_dataset = None eval_datasets = None num_train_steps = None tokenizer_S = teachers_and_student['student']['tokenizer'] prefix_S = teachers_and_student['student']['prefix'] if args.do_train: train_dataset = load_and_cache_examples(args, args.task_name, tokenizer_S, prefix=prefix_S, evaluate=False) if args.do_predict: eval_datasets = [] eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else ( args.task_name, ) for eval_task in eval_task_names: eval_datasets.append( load_and_cache_examples(args, eval_task, tokenizer_S, prefix=prefix_S, evaluate=True)) logger.info("Data loaded") #Build Model and load checkpoint if args.do_train: model_Ts = [] for teacher in teachers_and_student['teachers']: model_type_T = teacher['model_type'] model_config_T = teacher['config'] checkpoint_T = teacher['checkpoint'] _, _, model_class_T = MODEL_CLASSES[model_type_T] model_T = model_class_T(model_config_T, num_labels=num_labels) state_dict_T = torch.load(checkpoint_T, map_location='cpu') missing_keys, un_keys = model_T.load_state_dict(state_dict_T, strict=True) logger.info(f"Teacher Model {model_type_T} loaded") model_T.to(device) model_T.eval() model_Ts.append(model_T) student = teachers_and_student['student'] model_type_S = student['model_type'] model_config_S = student['config'] checkpoint_S = student['checkpoint'] _, _, model_class_S = MODEL_CLASSES[model_type_S] model_S = model_class_S(model_config_S, num_labels=num_labels) if checkpoint_S is not None: state_dict_S = torch.load(checkpoint_S, map_location='cpu') missing_keys, un_keys = model_S.load_state_dict(state_dict_S, strict=False) logger.info(f"missing keys:{missing_keys}") logger.info(f"unexpected keys:{un_keys}") else: logger.warning("Initializing student randomly") logger.info("Student Model loaded") model_S.to(device) if args.local_rank != -1 or n_gpu > 1: if args.local_rank != -1: raise NotImplementedError elif n_gpu > 1: if args.do_train: model_Ts = [ torch.nn.DataParallel(model_T) for model_T in model_Ts ] model_S = torch.nn.DataParallel(model_S) #,output_device=n_gpu-1) if args.do_train: #parameters params = list(model_S.named_parameters()) all_trainable_params = divide_parameters(params, lr=args.learning_rate) logger.info("Length of all_trainable_params: %d", len(all_trainable_params)) if args.local_rank == -1: train_sampler = RandomSampler(train_dataset) else: raise NotImplementedError train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.forward_batch_size, drop_last=True) num_train_steps = int( len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs) ########## DISTILLATION ########### train_config = TrainingConfig( gradient_accumulation_steps=args.gradient_accumulation_steps, ckpt_frequency=args.ckpt_frequency, log_dir=args.output_dir, output_dir=args.output_dir, fp16=args.fp16, device=args.device) distill_config = DistillationConfig(temperature=args.temperature, kd_loss_type='ce') logger.info(f"{train_config}") logger.info(f"{distill_config}") adaptor_T = BertForGLUESimpleAdaptor adaptor_S = BertForGLUESimpleAdaptor distiller = MultiTeacherDistiller(train_config=train_config, distill_config=distill_config, model_T=model_Ts, model_S=model_S, adaptor_T=adaptor_T, adaptor_S=adaptor_S) optimizer = AdamW(all_trainable_params, lr=args.learning_rate) scheduler_class = get_linear_schedule_with_warmup scheduler_args = { 'num_warmup_steps': int(args.warmup_proportion * num_train_steps), 'num_training_steps': num_train_steps } logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Forward batch size = %d", forward_batch_size) logger.info(" Num backward steps = %d", num_train_steps) callback_func = partial(predict, eval_datasets=eval_datasets, args=args) with distiller: distiller.train(optimizer, scheduler_class=scheduler_class, scheduler_args=scheduler_args, dataloader=train_dataloader, num_epochs=args.num_train_epochs, callback=callback_func, max_grad_norm=1) if not args.do_train and args.do_predict: res = predict(model_S, eval_datasets, step=0, args=args) print(res)
print('Cost at initial theta (zeros): ', cost) print('Gradient at initial theta (zeros): ') print(grad) result = opt.fmin_tnc(func=costfunction, x0=initial_theta, fprime=gradient, args=(X2, Y)) theta = result[0] cost = costfunction(theta, X2, Y) print('Cost at theta found by fmin_tnc: ', cost) print('Theta: ') print(theta) #Prediction p = predict(theta, X2) print('Prediction: ') print(p) #Accuracy correct = [ 1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(p, Y) ] accuracy = (sum(map(int, correct)) / len(correct)) print('Train Accuracy: {0}%'.format(accuracy * 100)) #Confusion Matrix Y_predict = predict(theta, X2_test) conf_metrics = metrics.confusion_matrix(Y_test, Y_predict) print('Confusion Metrics: ')