def run_cv_FedModel_clients( model_class: BaseModel, train_data: UserDayData, k: int, clients_per_round_list: List[float], parameter_dict: Dict[str, float]) -> Dict[str, float]: num_val_samples = (71 * parameter_dict['cv']) // k metrics_by_clients = defaultdict(list) for i in range(k): val_days = list(range(i * num_val_samples, (i + 1) * num_val_samples)) val_fold = train_data.get_subset_for_days(val_days) train_days = ( list(range(i * num_val_samples)) + list(range((i + 1) * num_val_samples, num_val_samples * k))) train_fold = train_data.get_subset_for_days(train_days) for clients_per_round in clients_per_round_list: # for cross-validation purposes, # use current learn_rate instead of user-inputted learn rate parameter_dict['fed_model_parameters'][ 'clients_per_round'] = clients_per_round model = model_class(parameter_config=parameter_dict, ) results = ExperimentUtils.run_single_experiment( model, train_fold, val_fold) # get the evaluation metric, based on what prediction problem we're doing if parameter_dict['output_layer']['loss_type'] == 'regression': metrics_by_clients[clients_per_round].append(results['mse']) elif parameter_dict['output_layer'][ 'loss_type'] == 'classification': if len(parameter_dict['output_layer'] ['classification_thresholds']) == 1: metrics_by_clients[clients_per_round].append( results['AUC']) elif len(parameter_dict['classification_thresholds']) > 1: metrics_by_clients[clients_per_round].append( results['accuracy']) else: raise ValueError('If loss_type is classification, \ classification_thresholds in the user-inputted .json \ must be a string of at least length 1') # write progress to file ExperimentUtils.write_to_json(metrics_by_clients, 'tmp_output' + '_cv_clients') for clients_per_round in clients_per_round_list: metrics_by_clients[str(clients_per_round) + '_avg'] = np.mean( metrics_by_clients[clients_per_round]) return metrics_by_clients
def main(): start = datetime.datetime.now() with open(sys.argv[1]) as file: parameter_dict = json.load(file) # get the name of the model from the user-inputted json file # and match it to the corresponding model object model_registry = { "fed_model": FedModel, } if model_registry.get(parameter_dict.get('model_type')) is None: raise KeyError('model_type in .json must be "fed_model"') else: model_class = model_registry[parameter_dict['model_type']] k = 3 clients_per_round_list = [ 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100 ] train_data, test_data = ExperimentUtils.simple_train_test_split( parameter_dict) metrics_by_clients = run_cv_FedModel_clients(model_class, train_data, k, clients_per_round_list, parameter_dict) ExperimentUtils.write_to_json( metrics_by_clients, parameter_dict['output_path'] + '_cv_clients') finish = datetime.datetime.now() - start print('Time to finish: ' + str(finish.total_seconds()))
def main(): start = datetime.datetime.now() with open(sys.argv[1]) as file: parameter_dict = json.load(file) # get the name of the model from the user-inputted json file # and match it to the corresponding model object model_registry = { "individual_model": IndividualModel, "global_model": GlobalModel, "global_model_pers": GlobalModelPersonalized, "fed_model": FedModel, "fed_model_pers": FedModelPersonalized, "moving_mean_model": MovingMeanModel, "baseline_model": BaselineModel, } if model_registry.get(parameter_dict.get('model_type')) is None: raise KeyError( 'model_type in config json must be one of: "individual_model",' '"global_model", "fed_model", "fed_model_pers", "global_model_pers", "moving_mean_model", "baseline_model"' ) else: model_class = model_registry[parameter_dict['model_type']] k = 3 # 5, 10, 25, 50 % of users #clients_per_round_list = [20, 40, 100, 200] clients_per_round_list = [1, 2, 4, 8, 10, 15] #local_updates_per_round_list = [2, 4, 6, 8] #local_updates_per_round_list = [8000, 10000, 50000] local_updates_per_round_list = [1, 2, 4, 8, 12] #fed_stepsize_list = [1] fed_stepsize_list = [0.01, 0.1, 0.5, 0.7, 1] #lrs = [0.00001, 0.00005, 0.005, 0.01, 0.03, 0.05] # an hour on jessica's computer # lrs = [0.02, 0.03, 0.04, 0.06] # 30 min on jessica's computer # lrs = np.linspace(0,1,50, endpoint = False) # 2.55 hours for global model on (0,1,50) on jessica's computer # lrs = np.linspace(0,1,25, endpoint = False) # 3.6 hours for individiual model on (0,1,25) on jessica's computer, 2.65 hours for global model + fed model on (0,1,25) # lrs = np.linspace(0,0.25,25, endpoint = False) #lrs = [1e-10, 1e-08, 1e-06, 1e-05, 0.0001, 0.0002, 0.0004, 0.0006, 0.0008, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1] #lrs = np.arange(0.01, 0.05, 0.01) #lrs = np.logspace(-5, -1, base = 10, num = 25) #lrs = [0.002, 0.004, 0.006, 0.008, 0.01] #lrs = np.concatenate([np.arange(0.005,0.01,0.001), np.arange(0.01,0.05,0.01)]) #lrs = np.arange(0.01, 0.1,0.01) lrs = [0.5, 0.6] # tune number of epochs jointly with learning rates #epochs = np.arange(10,80,20) #epochs = np.arange(30,80,20) #epochs = np.concatenate([np.arange(5,25,5), [40,50,60]]) epochs = [1, 2, 5, 10, 20, 30, 40] user_list = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17] #train_data, test_data = ExperimentUtils.raw_train_test_split(parameter_dict) train_data, test_data = ExperimentUtils.simple_train_test_split( parameter_dict) metrics_by_lr = run_cv(model_class, train_data, k, epochs, clients_per_round_list, local_updates_per_round_list, fed_stepsize_list, lrs, parameter_dict, user_list) ExperimentUtils.write_to_json( metrics_by_lr, parameter_dict['output_path'] + "_(" + parameter_dict['model_type'] + ")" + '_cv_lr') finish = datetime.datetime.now() - start print('Time to finish: ' + str(finish.total_seconds()))
def run_cv(model_class: BaseModel, train_data: UserDayData, k: int, epochs: List[int], clients_per_round_list: List[int], local_updates_per_round_list: List[int], fed_stepsize_list: List[float], lrs: List[float], parameter_dict: Dict[str, float], user_list: List[int]) -> Dict[str, float]: num_val_samples = 50 // k # save metrics in dictionary metrics_by_lr = defaultdict(list) for epoch in epochs: metrics_by_lr['Epoch' + '_' + str(epoch)] = defaultdict(list) # for cross-validation purposes, # use current epoch instead of user-inputted epoch parameter_dict['epochs'] = epoch for clients_per_round in clients_per_round_list: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)] = defaultdict(list) parameter_dict['fed_model_parameters'][ 'clients_per_round'] = clients_per_round for local_update in local_updates_per_round_list: metrics_by_lr['Epoch_' + str(epoch)]['Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)] = defaultdict(list) parameter_dict['fed_model_parameters'][ 'local_updates_per_round'] = local_update for fed_stepsize in fed_stepsize_list: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)] = defaultdict(list) parameter_dict['fed_model_parameters'][ 'fed_stepsize'] = fed_stepsize for lr in lrs: # for cross-validation purposes, # use current learn_rate instead of user-inputted learn rate parameter_dict['learn_rate'] = lr for i in range(k): val_pairs = [] train_pairs = [] for user in user_list: # we need to set the validation days for each user separately # because the order of tasks differed across users # and number of measurements differed very slightly # for each task for each user # get the indices where the classification label changes # this only works if the first and last values are NOT the same, # which in our case is true, because the labeled tasks # (baseline, stress, amusement) # occur sequentially and do not repeat # get the classes in the validation set for each user user_data = train_data.get_subset_for_users( [user]) user_days = [ x[1] for x in user_data.user_day_pairs ] user_val_y = user_data.get_y() # 7/15/2020 choosing 1 data point from each class for training #user_train_y = user_data.get_y() ''' # index where class a, class b, class c start class_b_start = np.where(np.roll(user_train_y,1)!=user_train_y)[0][1] class_c_start = np.where(np.roll(user_train_y,1)!=user_train_y)[0][2] class_a_idx = np.random.randint(low = 0, high = class_b_start) class_b_idx = np.random.randint(low = class_b_start, high = class_c_start) class_c_idx = np.random.randint(low = class_c_start, high = len(user_train_y)) user_train_idx = [class_a_idx, class_b_idx, class_c_idx] # get the days we want for training, based on our selected indices mask = np.zeros(np.array(user_days).shape,dtype = bool) mask[user_train_idx] = True user_train_days = np.array(user_days)[mask] for x in user_train_days: train_pairs.append((user, x)) # use the rest of the training data for validation set user_val_days = np.array(user_days)[~mask] for x in user_val_days: val_pairs.append((user, x)) ''' # 7/15/2020 comment out if we're choosing 1 data point from each class for training # index where class a, class b, class c start class_b_start = np.where( np.roll(user_val_y, 1) != user_val_y)[0][1] class_c_start = np.where( np.roll(user_val_y, 1) != user_val_y)[0][2] # get indices of user_days that we want for validation set class_a_idx = list( range(class_b_start // 3 * i, class_b_start // 3 * (i + 1))) class_b_idx = list( range( class_b_start + (class_c_start - class_b_start) // 3 * i, class_b_start + (class_c_start - class_b_start) // 3 * (i + 1))) class_c_idx = list( range( class_c_start + (len(user_val_y) - class_c_start) // 3 * i, class_c_start + (len(user_val_y) - class_c_start) // 3 * (i + 1))) user_val_idx = class_a_idx + class_b_idx + class_c_idx # get the days we want for validation, based on our selected indices mask = np.zeros(np.array(user_days).shape, dtype=bool) mask[user_val_idx] = True user_val_days = np.array(user_days)[mask] for x in user_val_days: val_pairs.append((user, x)) # use the rest of the training data for training set user_train_days = np.array(user_days)[~mask] for x in user_train_days: train_pairs.append((user, x)) #train_fold = train_data.get_subset_for_user_day_pairs(train_pairs) #val_fold = train_data.get_subset_for_user_day_pairs(val_pairs) # just to test, using global model's standardization tmp_train_fold = train_data.get_subset_for_user_day_pairs( train_pairs) tmp_val_fold = train_data.get_subset_for_user_day_pairs( val_pairs) # end # seeing if this will fix memory leak K.clear_session() session_conf = tf.ConfigProto( intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) model = model_class( parameter_config=parameter_dict, ) # standardization for fed models scaler = StandardScaler().fit( tmp_train_fold.get_X()) train_fold_X_np = scaler.transform( tmp_train_fold.get_X()) train_fold_X = pd.DataFrame( train_fold_X_np, columns=tmp_train_fold.get_X().columns) val_fold_X_np = scaler.transform( tmp_val_fold.get_X()) val_fold_X = pd.DataFrame( val_fold_X_np, columns=tmp_val_fold.get_X().columns) train_fold = UserDayData( X=train_fold_X, user_day_pairs=tmp_train_fold. get_user_day_pairs(), y=tmp_train_fold.get_y()) val_fold = UserDayData(X=val_fold_X, user_day_pairs=tmp_val_fold. get_user_day_pairs(), y=tmp_val_fold.get_y()) # end results = ExperimentUtils.run_single_experiment( model, train_fold, val_fold) # get the evaluation metric, based on what prediction problem we're doing if parameter_dict['output_layer'][ 'loss_type'] == 'regression': metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)][ 'Learn_rate_' + str(lr)].append(results['mse']) elif parameter_dict['output_layer'][ 'loss_type'] == 'classification': if len(parameter_dict['output_layer'] ['classification_thresholds']) == 1: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)][ 'Learn_rate_' + str(lr)].append( results['AUC']) elif len(parameter_dict['output_layer'] ['classification_thresholds']) > 1: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)][ 'Learn_rate_' + str(lr)].append( results['accuracy']) else: raise ValueError( 'If loss_type is classification, \ classification_thresholds in the user-inputted .json \ must have at least length 1') # write to file #ExperimentUtils.write_to_json(metrics_by_lr, parameter_dict['output_path'] + "_(" + parameter_dict['model_type'] + ")" + 'tmp_cv_lr') output_path = parameter_dict[ 'output_path'] + "_(" + parameter_dict[ 'model_type'] + ")" + 'tmp_cv_lr' with open(output_path + ".json", "w") as f: json.dump(metrics_by_lr, f, indent=4) for epoch in epochs: metrics_by_lr['Epoch_' + str(epoch) + '_avg'] = defaultdict(list) for clients_per_round in clients_per_round_list: metrics_by_lr['Epoch_' + str(epoch) + '_avg']['Clients_' + str(clients_per_round) + '_avg'] = defaultdict(list) for local_update in local_updates_per_round_list: metrics_by_lr['Epoch_' + str(epoch) + '_avg']['Clients_' + str(clients_per_round) + '_avg']['Local_Updates_' + str(local_update) + '_avg'] = defaultdict(list) for fed_stepsize in fed_stepsize_list: metrics_by_lr['Epoch_' + str(epoch) + '_avg'][ 'Clients_' + str(clients_per_round) + '_avg']['Local_Updates_' + str(local_update) + '_avg']['Fed_stepsize_' + str(fed_stepsize) + '_avg'] = defaultdict(list) for lr in lrs: metrics_by_lr['Epoch_' + str(epoch) + '_avg'][ 'Clients_' + str(clients_per_round) + '_avg'][ 'Local_Updates_' + str(local_update) + '_avg']['Fed_stepsize_' + str(fed_stepsize) + '_avg']['Learn_rate_' + str(lr) + '_avg'] = np.mean( metrics_by_lr['Epoch_' + str(epoch)] ['Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)] ['Learn_rate_' + str(lr)]) return metrics_by_lr
def run_cv(model_class: BaseModel, train_data: UserDayData, k: int, epochs: List[float], lrs: List[float], parameter_dict: Dict[str, float], user_list: List[int]) -> Dict[str, float]: num_val_samples = 50 // k # save metrics in dictionary metrics_by_lr = defaultdict(list) for epoch in epochs: metrics_by_lr[str(epoch)] = defaultdict(list) # for cross-validation purposes, # use current epoch instead of user-inputted epoch parameter_dict['epochs'] = epoch for lr in lrs: # use current learn_rate instead of user-inputted learn rate parameter_dict['learn_rate'] = lr for i in range(k): val_pairs = [] train_pairs = [] for user in user_list: # we need to set the validation days for each user separately # because the order of tasks differed across users # and number of measurements differed very slightly # for each task for each user # get the indices where the classification label changes # this only works if the first and last values are NOT the same, # which in our case is true, because the labeled tasks # (baseline, stress, amusement) # occur sequentially and do not repeat # get the classes in the validation set for each user user_data = train_data.get_subset_for_users([user]) user_days = [x[1] for x in user_data.user_day_pairs] user_val_y = user_data.get_y() # 7/15/2020 choosing 1 data point from each class for training #user_train_y = user_data.get_y() ''' # index where class a, class b, class c start class_b_start = np.where(np.roll(user_train_y,1)!=user_train_y)[0][1] class_c_start = np.where(np.roll(user_train_y,1)!=user_train_y)[0][2] class_a_idx = np.random.randint(low = 0, high = class_b_start) class_b_idx = np.random.randint(low = class_b_start, high = class_c_start) class_c_idx = np.random.randint(low = class_c_start, high = len(user_train_y)) user_train_idx = [class_a_idx, class_b_idx, class_c_idx] # get the days we want for training, based on our selected indices mask = np.zeros(np.array(user_days).shape,dtype = bool) mask[user_train_idx] = True user_train_days = np.array(user_days)[mask] for x in user_train_days: train_pairs.append((user, x)) # use the rest of the training data for validation set user_val_days = np.array(user_days)[~mask] for x in user_val_days: val_pairs.append((user, x)) ''' # 7/15/2020 comment out if we're choosing 1 data point from each class for training # index where class a, class b, class c start class_b_start = np.where( np.roll(user_val_y, 1) != user_val_y)[0][1] class_c_start = np.where( np.roll(user_val_y, 1) != user_val_y)[0][2] # get indices of user_days that we want for validation set class_a_idx = list( range(class_b_start // 3 * i, class_b_start // 3 * (i + 1))) class_b_idx = list( range( class_b_start + (class_c_start - class_b_start) // 3 * i, class_b_start + (class_c_start - class_b_start) // 3 * (i + 1))) class_c_idx = list( range( class_c_start + (len(user_val_y) - class_c_start) // 3 * i, class_c_start + (len(user_val_y) - class_c_start) // 3 * (i + 1))) user_val_idx = class_a_idx + class_b_idx + class_c_idx # get the days we want for validation, based on our selected indices mask = np.zeros(np.array(user_days).shape, dtype=bool) mask[user_val_idx] = True user_val_days = np.array(user_days)[mask] for x in user_val_days: val_pairs.append((user, x)) # use the rest of the training data for training set user_train_days = np.array(user_days)[~mask] for x in user_train_days: train_pairs.append((user, x)) train_fold = train_data.get_subset_for_user_day_pairs( train_pairs) val_fold = train_data.get_subset_for_user_day_pairs(val_pairs) model = model_class(parameter_config=parameter_dict, ) results = ExperimentUtils.run_single_experiment( model, train_fold, val_fold) # get the evaluation metric, based on what prediction problem we're doing if parameter_dict['output_layer']['loss_type'] == 'regression': metrics_by_lr[str(epoch)][lr].append(results['mse']) elif parameter_dict['output_layer'][ 'loss_type'] == 'classification': if len(parameter_dict['output_layer'] ['classification_thresholds']) == 1: metrics_by_lr[str(epoch)][lr].append(results['AUC']) elif len(parameter_dict['output_layer'] ['classification_thresholds']) > 1: metrics_by_lr[str(epoch)][lr].append( results['accuracy']) else: raise ValueError('If loss_type is classification, \ classification_thresholds in the user-inputted .json \ must be a string of at least length 1') # write to file #ExperimentUtils.write_to_json(metrics_by_lr, parameter_dict['output_path'] + "_(" + parameter_dict['model_type'] + ")" + 'tmp_cv_lr') output_path = parameter_dict[ 'output_path'] + "_(" + parameter_dict[ 'model_type'] + ")" + 'tmp_cv_lr' with open(output_path + ".json", "w") as f: json.dump(metrics_by_lr, f, indent=4) for epoch in epochs: metrics_by_lr[str(epoch) + '_avg'] = defaultdict(list) for lr in lrs: metrics_by_lr[str(epoch) + '_avg'][str(lr) + '_avg'] = np.mean( metrics_by_lr[str(epoch)][lr]) return metrics_by_lr
def main(): start = datetime.datetime.now() with open(sys.argv[1]) as file: parameter_dict = json.load(file) parameter_dict["seed"] = int(sys.argv[3]) parameter_dict["output_path"] = sys.argv[4] + sys.argv[5] train_data, test_data = ExperimentUtils.simple_train_test_split( parameter_dict) # get the name of the model from the user-inputted json file # and match it to the corresponding model object model_class = ExperimentUtils.model_from_config( parameter_dict['model_type']) model = model_class(parameter_config=parameter_dict) results = ExperimentUtils.run_single_experiment( model, train_data, test_data, parameter_dict['test_callback']) if parameter_dict['model_type'] != 'baseline' and parameter_dict[ 'model_type'] != 'moving_mean_model': results['lr'] = parameter_dict['learn_rate'] ind_results = model.individual_evaluate(test_data) if parameter_dict["plot_auc"]: ExperimentUtils.plot_auc( results["FPR"], results["TPR"], results["AUC"], str(parameter_dict["auc_output_path"] + "_(" + parameter_dict['model_type'] + ")")) try: del results["FPR"] del results["TPR"] except KeyError: pass ExperimentUtils.write_to_json( ind_results, str(parameter_dict["output_path"] + "_by_user" + "_(" + parameter_dict['model_type'] + ")")) ExperimentUtils.write_to_csv( results, str(parameter_dict["output_path"] + "_(" + parameter_dict['model_type'] + ")")) ExperimentUtils.write_to_json( results, str(parameter_dict["output_path"] + "_(" + parameter_dict['model_type'] + ")")) finish = datetime.datetime.now() - start print('Time to finish: ' + str(finish.total_seconds()))
def run_cv(model_class: BaseModel, train_data: UserDayData, k: int, epochs: List[int], clients_per_round_list: List[int], local_updates_per_round_list: List[int], fed_stepsize_list: List[float], lrs: List[float], parameter_dict: Dict[str, float]) -> Dict[str, float]: num_val_samples = 50 // k # save metrics in dictionary metrics_by_lr = defaultdict(list) for epoch in epochs: metrics_by_lr['Epoch' + '_' + str(epoch)] = defaultdict(list) # for cross-validation purposes, # use current epoch instead of user-inputted epoch parameter_dict['epochs'] = epoch for clients_per_round in clients_per_round_list: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)] = defaultdict(list) parameter_dict['fed_model_parameters'][ 'clients_per_round'] = clients_per_round for local_update in local_updates_per_round_list: metrics_by_lr['Epoch_' + str(epoch)]['Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)] = defaultdict(list) parameter_dict['fed_model_parameters'][ 'local_updates_per_round'] = local_update for fed_stepsize in fed_stepsize_list: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)] = defaultdict(list) parameter_dict['fed_model_parameters'][ 'fed_stepsize'] = fed_stepsize for lr in lrs: # for cross-validation purposes, # use current learn_rate instead of user-inputted learn rate parameter_dict['learn_rate'] = lr for i in range(k): val_days = list( range(i * num_val_samples, (i + 1) * num_val_samples)) val_fold = train_data.get_subset_for_days(val_days) train_days = (list(range(i * num_val_samples)) + list( range((i + 1) * num_val_samples, num_val_samples * k))) train_fold = train_data.get_subset_for_days( train_days) model = model_class( parameter_config=parameter_dict, ) results = ExperimentUtils.run_single_experiment( model, train_fold, val_fold) # get the evaluation metric, based on what prediction problem we're doing if parameter_dict['output_layer'][ 'loss_type'] == 'regression': metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)][ 'Learn_rate_' + str(lr)].append(results['mse']) elif parameter_dict['output_layer'][ 'loss_type'] == 'classification': if len(parameter_dict['output_layer'] ['classification_thresholds']) == 1: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)][ 'Learn_rate_' + str(lr)].append( results['AUC']) elif len(parameter_dict['output_layer'] ['classification_thresholds']) > 1: metrics_by_lr['Epoch_' + str(epoch)][ 'Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)][ 'Learn_rate_' + str(lr)].append( results['accuracy']) else: raise ValueError( 'If loss_type is classification, \ classification_thresholds in the user-inputted .json \ must have at least length 1') # write to file #ExperimentUtils.write_to_json(metrics_by_lr, parameter_dict['output_path'] + 'tmp_cv_lr') output_path = parameter_dict[ 'output_path'] + 'tmp_cv_lr' with open(output_path + ".json", "w") as f: json.dump(metrics_by_lr, f, indent=4) for epoch in epochs: metrics_by_lr['Epoch_' + str(epoch) + '_avg'] = defaultdict(list) for clients_per_round in clients_per_round_list: metrics_by_lr['Epoch_' + str(epoch) + '_avg']['Clients_' + str(clients_per_round) + '_avg'] = defaultdict(list) for local_update in local_updates_per_round_list: metrics_by_lr['Epoch_' + str(epoch) + '_avg']['Clients_' + str(clients_per_round) + '_avg']['Local_Updates_' + str(local_update) + '_avg'] = defaultdict(list) for fed_stepsize in fed_stepsize_list: metrics_by_lr['Epoch_' + str(epoch) + '_avg'][ 'Clients_' + str(clients_per_round) + '_avg']['Local_Updates_' + str(local_update) + '_avg']['Fed_stepsize_' + str(fed_stepsize) + '_avg'] = defaultdict(list) for lr in lrs: metrics_by_lr['Epoch_' + str(epoch) + '_avg'][ 'Clients_' + str(clients_per_round) + '_avg'][ 'Local_Updates_' + str(local_update) + '_avg']['Fed_stepsize_' + str(fed_stepsize) + '_avg']['Learn_rate_' + str(lr) + '_avg'] = np.mean( metrics_by_lr['Epoch_' + str(epoch)] ['Clients_' + str(clients_per_round)][ 'Local_Updates_' + str(local_update)][ 'Fed_stepsize_' + str(fed_stepsize)] ['Learn_rate_' + str(lr)]) return metrics_by_lr
def main(): start = datetime.datetime.now() with open(sys.argv[1]) as file: parameter_dict = json.load(file) # get the name of the model from the user-inputted json file # and match it to the corresponding model object model_registry = { 'individual_model': IndividualModel, 'global_model': GlobalModel, 'fed_model': FedModel, } if model_registry.get(parameter_dict.get('model_type')) is None: raise KeyError( "model_type in .json must be one of: 'individual_model', 'global_model', 'fed_model'" ) else: model_class = model_registry[parameter_dict['model_type']] k = 3 #lrs = [0.00001, 0.00005, 0.005, 0.01, 0.03, 0.05] # an hour on jessica's computer # lrs = [0.02, 0.03, 0.04, 0.06] # 30 min on jessica's computer # lrs = np.linspace(0,1,50, endpoint = False) # 2.55 hours for global model on (0,1,50) on jessica's computer # lrs = np.linspace(0,1,25, endpoint = False) # 3.6 hours for individiual model on (0,1,25) on jessica's computer, 2.65 hours for global model + fed model on (0,1,25) # lrs = np.linspace(0,0.25,25, endpoint = False) #lrs = np.arange(0.05, 0.3,0.01) #lrs = [1e-10, 1e-08, 1e-06, 1e-05, 0.0001, 0.0002, 0.0004, 0.0006, 0.0008, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1] #lrs = np.arange(0.01, 0.05, 0.01) #lrs = np.logspace(-5, -1, base = 10, num = 25) #lrs = [1e-10, 1e-08, 1e-06, 1e-05] #lrs = np.concatenate([np.arange(0.005,0.01,0.001), np.arange(0.01,0.05,0.01)]) #import pdb #pdb.set_trace() #n = len(sys.argv[2]) #a = sys.argv[2][1:n-1] #a = a.split(',') #lrs = [float(i) for i in a] lrs = [float(sys.argv[3])] # tune number of epochs jointly with learning rates #epochs = np.arange(10,80,20) #epochs = np.concatenate([np.arange(5,25,5), [30]]) #n = len(sys.argv[3]) #a = sys.argv[3][1:n-1] n = len(sys.argv[4]) a = sys.argv[4][1:n - 1] a = a.split(', ') epochs = [int(i) for i in a] #epochs = [int(sys.argv[4])] output_path = sys.argv[5] + sys.argv[6] train_data, test_data = ExperimentUtils.simple_train_test_split( parameter_dict) metrics_by_lr = run_cv(model_class, train_data, k, epochs, lrs, parameter_dict) # output path is now the job array ID #ExperimentUtils.write_to_json(metrics_by_lr, parameter_dict['output_path'] + '_cv_lr') ExperimentUtils.write_to_json(metrics_by_lr, output_path + '_cv_lr') finish = datetime.datetime.now() - start print('Time to finish: ' + str(finish.total_seconds()))