def tuning_model(inp, extractor, pre_model, feature_model, data_dict, sites, augData=0): allData, allLabel = mytools.datadict2data(data_dict, sites) if augData: allData, allLabel = augData.data_aug(allData, allLabel, newSamNum=20) clsNum = len(sites) allData = allData[:, :, np.newaxis] allLabel = to_categorical(allLabel, clsNum) # replace the last layer outLayer = Dense(clsNum, activation='softmax')(extractor) new_model = Model(inputs=inp, outputs=outLayer) new_model = copy_weights(new_model, pre_model, compileModel=False) print('Compiling...') new_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) # tunning the model modelPath = os.path.join(ResDir, 'best_tune_model.h5') checkpointer = ModelCheckpoint(filepath=modelPath, monitor='accuracy', verbose=1, save_best_only=True, mode='max') earlyStopper = EarlyStopping(monitor='accuracy', mode='max', patience=10) callBackList = [checkpointer, earlyStopper] new_model.fit(allData, allLabel, batch_size=64, epochs=30, verbose=1, shuffle=True, callbacks=callBackList) feature_model = copy_weights(feature_model, new_model) return feature_model
def kNN_train(signature_vector_dict, params): site_labels = list(signature_vector_dict.keys()) random.shuffle(site_labels) X_train, y_train = mytools.datadict2data(signature_vector_dict) print('kNN training data shape: ', X_train.shape) knn = KNeighborsClassifier(n_neighbors=params['k'], weights=params['weights'], p=params['p'], metric=params['metric'], algorithm='brute') knn.fit(X_train, y_train) return knn, site_labels
def test_open(self, opts, threshold, openDataOpt, test_times=5): n_shot = opts.nShot precision_list, recall_list, tpr_list, fpr_list = [], [], [], [] for i in range(test_times): # tune model phase signature_dict, test_dict = self.formOpenData( opts, n_shot, openDataOpt=openDataOpt) X_train, y_train = mytools.datadict2data(signature_dict) size_of_problem = len(set(y_train)) print('n_shot is: {}\tsize_of_problem is: {}'.format( n_shot, size_of_problem)) NUM_CLASS = len(set(y_train)) X_train = X_train[:, :, np.newaxis] y_train = np_utils.to_categorical(y_train, NUM_CLASS) new_model = self.tuneTheModel(X_train, y_train, NUM_CLASS) # test phase X_test_Mon, y_test_Mon, X_test_Umon, y_test_Umon, maxLabel = utility.splitMonAndUnmon( test_dict) result_Mon = new_model.predict(X_test_Mon) result_Umon = new_model.predict(X_test_Umon) precision, recall, tpr, fpr = utility.calculatePrecAndRecAndTPRAndFPR( result_Mon, result_Umon, y_test_Mon, maxLabel, threshold) precision_list.append(precision) recall_list.append(recall) tpr_list.append(tpr) fpr_list.append(fpr) mean_precision, mean_recall, mean_tpr, mean_fpr = mean( precision_list), mean(recall_list), mean(tpr_list), mean(fpr_list) print('precision = ', mean_precision, '\trecall = ', mean_recall, '\tTPR = ', mean_tpr, '\tFPR = ', mean_fpr) return mean_precision, mean_recall, mean_tpr, mean_fpr
def run(param, args): source = os.path.basename(args.source).split('.')[0] target = os.path.basename(args.target).split('.')[0] flag = False if 'trainNum' == args.testType else True test_num = 10 if flag: # Load source and target data param["source_data"], param["source_label"] = data.data_loader(args.source, param["inp_dims"], sample_num=25) # Encode labels into one-hot format clsNum = len(set(param["source_label"])) param["source_label"] = data.one_hot_encoding(param["source_label"], clsNum) else: print('will run train num test, so not loading training data at first') if 'nShot' == args.testType: print('run n_shot test...') n_shot_list = [1, 5, 10, 15, 20] #n_shot_list = [20] outfile = os.path.join(ResDir, 'ADA_one_source_{}_target_{}_res.txt'.format(source, target)) f = open(outfile, 'a+') print('\n\n##################### test time is: {}####################'.format(time.ctime()), file=f, flush=True) for n_shot in n_shot_list: acc_list = [] time_last_list = [] for i in range(test_num): # Train phase signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70) target_data, target_label = mytools.datadict2data(signature_dict) print('target data shape: ', target_data.shape) target_data = target_data[:, :, np.newaxis] target_label = data.one_hot_encoding(target_label, len(set(target_label))) param["target_data"], param["target_label"] = target_data, target_label model_path, time_last = train(param, args) time_last_list.append(time_last) print('training time last: ', time_last) # Test phase test_opts = test.MyOpts(model_path, nShot=n_shot, tuning=True, aug=0, exp_type=args.exp_type) test_opts.nShot = n_shot test_params = test.generate_default_params(test_opts) inp_shape = (param["inp_dims"], 1) _, acc = test.run(test_opts, signature_dict, test_dict, params=test_params, emb_size=param['embsz'], inp_shape=inp_shape, test_times=1) acc_list.append(acc) print('acc of source {} and target {} with n_shot {} is: {:f}'.format(source, target, n_shot, acc)) resLine = 'acc of source {} and target {} with n_shot {} is: {:f}, stdev is: {:f}, time last: {:f}\n\n'.format(source, target, n_shot, mean(acc_list), stdev(acc_list), mean(time_last_list)) print(resLine, file=f, flush=True) f.close() elif 'aug' == args.testType: print('will run aug test...') pass elif 'trainNum' == args.testType: print('will run train num test...') n_shot = 20 outfile = os.path.join(ResDir, 'trainNumTest_ADA_one_source_{}_target_{}_res.txt'.format(source, target)) f = open(outfile, 'a+') print('\n\n################### test time is: {} ####################'.format(time.ctime()), file=f, flush=True) print('test with N shot num: {}'.format(n_shot), file=f, flush=True) trainNumList = [25, 50, 75, 100, 125] for trainNum in trainNumList: acc_list, time_last_list = [], [] # load training data accord to the train num param["source_data"], param["source_label"] = data.data_loader(args.source, param["inp_dims"], sample_num=trainNum) print('train data shape is: ', np.array(param['source_data']).shape) clsNum = len(set(param["source_label"])) param["source_label"] = data.one_hot_encoding(param["source_label"], clsNum) for i in range(test_num): # Train phase signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70) target_data, target_label = mytools.datadict2data(signature_dict) target_data = target_data[:, :, np.newaxis] target_label = data.one_hot_encoding(target_label, len(set(target_label))) param["target_data"], param["target_label"] = target_data, target_label model_path, time_last = train(param, args) time_last_list.append(time_last) # Test phase test_opts = test.MyOpts(model_path, nShot=n_shot, tuning=True, aug=0, exp_type=args.exp_type) test_opts.nShot = n_shot test_params = test.generate_default_params(test_opts) inp_shape = (param["inp_dims"], 1) _, acc = test.run(test_opts, signature_dict, test_dict, params=test_params, emb_size=param['embsz'], inp_shape=inp_shape, test_times=1) acc_list.append(acc) print('acc of source {} and target {} with n_shot {} is: {:f}'.format(source, target, n_shot, acc)) resLine = 'acc of source {} and target {} with n_shot {} is: {:f}, stdev is: {:f}, training time last: {:f}'.format(source, target, n_shot, mean(acc_list), stdev(acc_list), mean(time_last_list)) print(resLine, file=f, flush=True) f.close() elif 'trainTime' == args.testType: # Train phase n_shot = 20 signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70) target_data, target_label = mytools.datadict2data(signature_dict) target_data = target_data[:, :, np.newaxis] target_label = data.one_hot_encoding(target_label, len(set(target_label))) param["target_data"], param["target_label"] = target_data, target_label model_path, time_last = train(param, args) print('training time last: ', time_last) else: raise