Пример #1
0
def main():
    start = datetime.datetime.now()

    with open(sys.argv[1]) as file:
        parameter_dict = json.load(file)

    parameter_dict["seed"] = int(sys.argv[3])
    parameter_dict["output_path"] = sys.argv[4] + sys.argv[5]

    train_data, test_data = ExperimentUtils.simple_train_test_split(
        parameter_dict)

    # get the name of the model from the user-inputted json file
    # and match it to the corresponding model object
    model_class = ExperimentUtils.model_from_config(
        parameter_dict['model_type'])
    model = model_class(parameter_config=parameter_dict)

    results = ExperimentUtils.run_single_experiment(
        model, train_data, test_data, parameter_dict['test_callback'])

    if parameter_dict['model_type'] != 'baseline' and parameter_dict[
            'model_type'] != 'moving_mean_model':
        results['lr'] = parameter_dict['learn_rate']

    ind_results = model.individual_evaluate(test_data)

    if parameter_dict["plot_auc"]:

        ExperimentUtils.plot_auc(
            results["FPR"], results["TPR"], results["AUC"],
            str(parameter_dict["auc_output_path"] + "_(" +
                parameter_dict['model_type'] + ")"))
    try:
        del results["FPR"]
        del results["TPR"]
    except KeyError:
        pass
    ExperimentUtils.write_to_json(
        ind_results,
        str(parameter_dict["output_path"] + "_by_user" + "_(" +
            parameter_dict['model_type'] + ")"))
    ExperimentUtils.write_to_csv(
        results,
        str(parameter_dict["output_path"] + "_(" +
            parameter_dict['model_type'] + ")"))
    ExperimentUtils.write_to_json(
        results,
        str(parameter_dict["output_path"] + "_(" +
            parameter_dict['model_type'] + ")"))

    finish = datetime.datetime.now() - start
    print('Time to finish: ' + str(finish.total_seconds()))
def main():
    start = datetime.datetime.now()

    with open(sys.argv[1]) as file:
        parameter_dict = json.load(file)

    # get the name of the model from the user-inputted json file
    # and match it to the corresponding model object
    model_registry = {
        "fed_model": FedModel,
    }

    if model_registry.get(parameter_dict.get('model_type')) is None:
        raise KeyError('model_type in .json must be "fed_model"')
    else:
        model_class = model_registry[parameter_dict['model_type']]

    k = 3

    clients_per_round_list = [
        10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95,
        100
    ]

    train_data, test_data = ExperimentUtils.simple_train_test_split(
        parameter_dict)

    metrics_by_clients = run_cv_FedModel_clients(model_class, train_data, k,
                                                 clients_per_round_list,
                                                 parameter_dict)

    ExperimentUtils.write_to_json(
        metrics_by_clients, parameter_dict['output_path'] + '_cv_clients')

    finish = datetime.datetime.now() - start
    print('Time to finish: ' + str(finish.total_seconds()))
Пример #3
0
def main():
    start = datetime.datetime.now()

    with open(sys.argv[1]) as file:
        parameter_dict = json.load(file)

    # get the name of the model from the user-inputted json file
    # and match it to the corresponding model object
    model_registry = {
        "individual_model": IndividualModel,
        "global_model": GlobalModel,
        "global_model_pers": GlobalModelPersonalized,
        "fed_model": FedModel,
        "fed_model_pers": FedModelPersonalized,
        "moving_mean_model": MovingMeanModel,
        "baseline_model": BaselineModel,
    }

    if model_registry.get(parameter_dict.get('model_type')) is None:
        raise KeyError(
            'model_type in config json must be one of: "individual_model",'
            '"global_model", "fed_model", "fed_model_pers", "global_model_pers", "moving_mean_model", "baseline_model"'
        )
    else:
        model_class = model_registry[parameter_dict['model_type']]

    k = 3

    # 5, 10, 25, 50 % of users
    #clients_per_round_list = [20, 40, 100, 200]
    clients_per_round_list = [1, 2, 4, 8, 10, 15]

    #local_updates_per_round_list = [2, 4, 6, 8]
    #local_updates_per_round_list = [8000, 10000, 50000]
    local_updates_per_round_list = [1, 2, 4, 8, 12]

    #fed_stepsize_list = [1]
    fed_stepsize_list = [0.01, 0.1, 0.5, 0.7, 1]

    #lrs = [0.00001, 0.00005, 0.005, 0.01, 0.03, 0.05] # an hour on jessica's computer
    # lrs = [0.02, 0.03, 0.04, 0.06] # 30 min on jessica's computer
    # lrs = np.linspace(0,1,50, endpoint = False) # 2.55 hours for global model on (0,1,50) on jessica's computer
    # lrs = np.linspace(0,1,25, endpoint = False) # 3.6 hours for individiual model on (0,1,25) on jessica's computer, 2.65 hours for global model + fed model on (0,1,25)
    # lrs = np.linspace(0,0.25,25, endpoint = False)
    #lrs = [1e-10, 1e-08, 1e-06, 1e-05, 0.0001, 0.0002, 0.0004, 0.0006, 0.0008, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1]
    #lrs = np.arange(0.01, 0.05, 0.01)
    #lrs = np.logspace(-5, -1, base = 10, num = 25)
    #lrs = [0.002, 0.004, 0.006, 0.008, 0.01]
    #lrs = np.concatenate([np.arange(0.005,0.01,0.001), np.arange(0.01,0.05,0.01)])
    #lrs = np.arange(0.01, 0.1,0.01)
    lrs = [0.5, 0.6]

    # tune number of epochs jointly with learning rates
    #epochs = np.arange(10,80,20)
    #epochs = np.arange(30,80,20)
    #epochs = np.concatenate([np.arange(5,25,5), [40,50,60]])
    epochs = [1, 2, 5, 10, 20, 30, 40]

    user_list = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]

    #train_data, test_data = ExperimentUtils.raw_train_test_split(parameter_dict)
    train_data, test_data = ExperimentUtils.simple_train_test_split(
        parameter_dict)

    metrics_by_lr = run_cv(model_class, train_data, k, epochs,
                           clients_per_round_list,
                           local_updates_per_round_list, fed_stepsize_list,
                           lrs, parameter_dict, user_list)

    ExperimentUtils.write_to_json(
        metrics_by_lr, parameter_dict['output_path'] + "_(" +
        parameter_dict['model_type'] + ")" + '_cv_lr')

    finish = datetime.datetime.now() - start
    print('Time to finish: ' + str(finish.total_seconds()))
def main():
    start = datetime.datetime.now()

    with open(sys.argv[1]) as file:
        parameter_dict = json.load(file)

    # get the name of the model from the user-inputted json file
    # and match it to the corresponding model object
    model_registry = {
        'individual_model': IndividualModel,
        'global_model': GlobalModel,
        'fed_model': FedModel,
    }

    if model_registry.get(parameter_dict.get('model_type')) is None:
        raise KeyError(
            "model_type in .json must be one of: 'individual_model', 'global_model', 'fed_model'"
        )
    else:
        model_class = model_registry[parameter_dict['model_type']]

    k = 3
    #lrs = [0.00001, 0.00005, 0.005, 0.01, 0.03, 0.05] # an hour on jessica's computer
    # lrs = [0.02, 0.03, 0.04, 0.06] # 30 min on jessica's computer
    # lrs = np.linspace(0,1,50, endpoint = False) # 2.55 hours for global model on (0,1,50) on jessica's computer
    # lrs = np.linspace(0,1,25, endpoint = False) # 3.6 hours for individiual model on (0,1,25) on jessica's computer, 2.65 hours for global model + fed model on (0,1,25)
    # lrs = np.linspace(0,0.25,25, endpoint = False)
    #lrs = np.arange(0.05, 0.3,0.01)
    #lrs = [1e-10, 1e-08, 1e-06, 1e-05, 0.0001, 0.0002, 0.0004, 0.0006, 0.0008, 0.001, 0.002, 0.004, 0.006, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1]
    #lrs = np.arange(0.01, 0.05, 0.01)
    #lrs = np.logspace(-5, -1, base = 10, num = 25)
    #lrs = [1e-10, 1e-08, 1e-06, 1e-05]
    #lrs = np.concatenate([np.arange(0.005,0.01,0.001), np.arange(0.01,0.05,0.01)])
    #import pdb
    #pdb.set_trace()

    #n = len(sys.argv[2])
    #a = sys.argv[2][1:n-1]
    #a = a.split(',')

    #lrs = [float(i) for i in a]
    lrs = [float(sys.argv[3])]

    # tune number of epochs jointly with learning rates
    #epochs = np.arange(10,80,20)
    #epochs = np.concatenate([np.arange(5,25,5), [30]])

    #n = len(sys.argv[3])
    #a = sys.argv[3][1:n-1]
    n = len(sys.argv[4])
    a = sys.argv[4][1:n - 1]
    a = a.split(', ')

    epochs = [int(i) for i in a]
    #epochs = [int(sys.argv[4])]
    output_path = sys.argv[5] + sys.argv[6]

    train_data, test_data = ExperimentUtils.simple_train_test_split(
        parameter_dict)

    metrics_by_lr = run_cv(model_class, train_data, k, epochs, lrs,
                           parameter_dict)

    # output path is now the job array ID
    #ExperimentUtils.write_to_json(metrics_by_lr, parameter_dict['output_path'] + '_cv_lr')
    ExperimentUtils.write_to_json(metrics_by_lr, output_path + '_cv_lr')

    finish = datetime.datetime.now() - start
    print('Time to finish: ' + str(finish.total_seconds()))