Exemplo n.º 1
0
def read_input(input_dir: str):
    """
    Reads all files stored in 'input_dir'.
    :param input_dir: The input directory containing the files.
    :return: None
    """
    data = None
    filename = redis_get('input_filename')
    missing_data = redis_get('missing_data')
    try:
        current_app.logger.info('[API] Parsing data of ' + input_dir)
        current_app.logger.info('[API] ' + filename)
        if filename.endswith(".csv"):
            sep = ','
            data = pd.read_csv(input_dir + '/' + filename, sep=sep)
        elif filename.endswith(".tsv"):
            sep = '\t'
            data = pd.read_csv(input_dir + '/' + filename, sep=sep)
        if missing_data == "mean":
            data.fillna(data.mean(), inplace=True)
        elif missing_data == "median":
            data.fillna(data.median(), inplace=True)
        elif missing_data == "drop":
            data.dropna(inplace=True)

        current_app.logger.info('[API] ' + str(data))

        return data

    except Exception as e:
        current_app.logger.info('[API] could not read files', e)
Exemplo n.º 2
0
def root():
    """
    decides which HTML page content will be shown to the user
    :return: HTML content
    """
    step = get_step()
    if step == 'start':
        current_app.logger.info('[WEB] Initializing')
        return 'Initializing'
    elif step == 'setup':
        current_app.logger.info('[WEB] Setup')
        return 'Setup'
    elif step == 'local_calculation':
        current_app.logger.info('[WEB] Perform local boosting')
        return 'Perform local boosting...'
    elif step == 'waiting':
        if redis_get('is_coordinator'):
            current_app.logger.info('[WEB] Waiting for client model...')
            return 'Waiting fo client model...'
        else:
            current_app.logger.info('[WEB] Send local results to coordinator')
            return 'Send local results to coordinator'
    elif step == 'global_calculation':
        current_app.logger.info('[WEB] Combine models')
        return 'Combine models...'
    elif step == 'broadcast_results':
        if not redis_get('coordinator'):
            current_app.logger.info(
                '[WEB] Receiving global model from coordinator')
            return 'Receiving global model from coordinator...'
        else:
            current_app.logger.info(
                '[WEB] Broadcasting global model to other clients')
            return 'Broadcasting global model to other clients...'
    elif step == 'test_results':
        current_app.logger.info('[WEB] Write Results')
        return 'Test results....'
    elif step == 'finalize':
        current_app.logger.info('[WEB] Finalize')
        return 'Finalize...'
    elif step == 'finished':
        current_app.logger.info('[WEB] Finished')
        return render_template('start_client.html',
                               score1=redis_get('score_single'),
                               score2=redis_get('score_combined'),
                               pred=redis_get('predictions'))
    else:
        return 'Something went wrong.'
Exemplo n.º 3
0
def get_connections(source, destination, departure_date):
    key = journey_key(source, destination, departure_date)
    kwargs = {
        "source": source,
        "destination": destination,
        "departure_date": departure_date,
    }
    return redis_get(key, 60 * 60, fetch_connections, kwargs)
Exemplo n.º 4
0
def calculate_average():
    global_model = jsonpickle.decode(redis_get('global_model'))
    client_id = redis_get('id')
    test_set = jsonpickle.decode(redis_get("test_set"))
    x_test = test_set[0]
    y_test = test_set[1]

    clf = global_model
    sum_pred = clf.predict(x_test)

    if "acc" in redis_get("metric"):
        score = accuracy_score(y_test, sum_pred)
    elif "matth" in redis_get("metric"):
        score = matthews_corrcoef(y_test, sum_pred)
    elif "roc" in redis_get("metric") or "auc" in redis_get("metric"):
        score = roc_auc_score(y_test, sum_pred)
    else:
        score = accuracy_score(y_test, sum_pred)
    current_app.logger.info(
        f'[API] Combined AdaBoost classifier model score on local test data for {client_id}: {score}, Predictions: {sum_pred}'
    )
    redis_set('predictions', sum_pred)
    redis_set('score_combined', score)

    return score
Exemplo n.º 5
0
def has_client_model_arrived():
    """
    Checks if the models of all clients has arrived.
    :return: None
    """
    current_app.logger.info(
        '[API] Coordinator checks if the models of all clients have arrived')
    global_data = redis_get('global_data')
    nr_clients = redis_get('nr_clients')
    current_app.logger.info('[API] ' + str(len(global_data)) + "/" +
                            str(nr_clients) +
                            " clients have sent their models.")
    if len(global_data) == nr_clients:
        current_app.logger.info('[API] The models of all clients have arrived')
        set_step('global_calculation')
    else:
        current_app.logger.info(
            '[API] The model of at least one client is still missing')
Exemplo n.º 6
0
def have_clients_finished():
    """
    Checks if the all clients have finished.
    :return: True if all clients have finished, False otherwise
    """
    current_app.logger.info(
        '[API] Coordinator checks if all clients have finished')
    finish = redis_get('finished')
    nr_clients = redis_get('nr_clients')
    current_app.logger.info('[API] ' + str(len(finish)) + "/" +
                            str(nr_clients) +
                            " clients have finished already.")
    if len(finish) == nr_clients:
        current_app.logger.info('[API] All clients have finished.')
        return True
    else:
        current_app.logger.info(
            '[API] At least one client did not finish yet-')
        return False
def get_location_id(city_name, cities):
    key = city_key(city_name)

    kwargs = {"city_name": city_name, "cities": cities}
    city = redis_get(key, 60 * 60, get_city_by_name, kwargs)

    if not city:
        return None

    return city["id"]
Exemplo n.º 8
0
def calculate_global_model():
    """
    Combines the models of all clients in a list.
    :return: None
    """
    current_app.logger.info('[API] Combine all models')
    global_data = redis_get('global_data')
    global_model = jsonpickle.decode(global_data[0])
    for model in global_data[1:]:
        global_model.estimators_ = global_model.estimators_ + jsonpickle.decode(
            model).estimators_
    redis_set('global_model', jsonpickle.encode(global_model))
Exemplo n.º 9
0
def calculate_local_model():
    """
    Perform local boosting
    :return: the model
    """
    current_app.logger.info('[API] Perform local boosting')
    d = redis_get('files')

    if d is None:
        current_app.logger.info('[API] No data available')
        return None
    else:
        client_id = redis_get('id')

        df = set_X_y(d, label_col=redis_get("label_col"))

        # Split dataset into training set and test set
        # 70% training and 30% test
        x_train, x_test, y_train, y_test = train_test_split(
            df.get("data"),
            df.get("target"),
            test_size=redis_get("test_size"),
            stratify=df.get("target"),
            random_state=redis_get("random_state"))
        redis_set("test_set", jsonpickle.encode([x_test, y_test]))
        score, model = build_model(x_train, x_test, y_train, y_test)
        saved_model = jsonpickle.encode(model)
        metric = redis_get("metric")
        current_app.logger.info(
            f'[API] Local AdaBoost classifier model {metric} {client_id}: {score} '
        )
        redis_set('score_single', score)
        return saved_model
Exemplo n.º 10
0
def read_config():
    with open(INPUT_DIR + '/config.yml') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)['fc_boosting']

        redis_set('input_filename', config['files']['input'])
        redis_set('label_col', config['parameters']['label_col'])
        redis_set('output_filename', config['files']['output'])
        redis_set('missing_data', config['files']['missing_data'])

        redis_set('test_size', config['parameters']['test_size'])
        redis_set('n_estimators', config['parameters']['n_estimators'])
        redis_set('learning_rate', config['parameters']['learning_rate'])
        redis_set('random_state', config['parameters']['random_state'])
        redis_set('metric', config['parameters']['metric'])
        current_app.logger.info('[API] label_col: ' +
                                str(redis_get('label_col')))
        current_app.logger.info('[API] test_size: ' +
                                str(redis_get('test_size')))
        current_app.logger.info('[API] n_estimators: ' +
                                str(redis_get('n_estimators')))
        current_app.logger.info('[API] learning_rate: ' +
                                str(redis_get('learning_rate')))
        current_app.logger.info('[API] random_state: ' +
                                str(redis_get('random_state')))
        current_app.logger.info('[API] metric: ' + str(redis_get('metric')))
Exemplo n.º 11
0
def data():
    """
    GET request to /data sends data to coordinator
    POST request to /data pulls data from coordinator
    :return: GET request: JSON with key 'data' and value data
             POST request: JSON True
    """
    if request.method == 'POST':
        current_app.logger.info('[API] /data POST request')
        if redis_get('is_coordinator'):
            # Get data from clients (as coordinator)
            if get_step() != 'finalize':
                # Get local models of the clients
                global_data = redis_get('global_data')
                global_data.append(request.get_json(True)['data'])
                redis_set('global_data', global_data)
                return jsonify(True)
            else:
                # Get Finished flags of the clients
                request.get_json(True)
                finish = redis_get('finished')
                finish.append(request.get_json(True)['finished'])
                redis_set('finished', finish)
                return jsonify(True)
        else:
            # Get models from coordinator (as client)
            redis_set('global_model', request.get_json(True)['global_model'])
            set_step('test_results')
            return jsonify(True)

    elif request.method == 'GET':
        current_app.logger.info('[API] /data GET request')
        if not redis_get('is_coordinator'):
            # send model to coordinator (as client)
            if get_step() != 'finalize':
                # Send local model to the coordinator
                current_app.logger.info('[API] send model to coordinator')
                redis_set('available', False)
                local_data = redis_get('local_data')
                return jsonify({'data': local_data})
            else:
                # Send finish flag to the coordinator
                current_app.logger.info(
                    '[API] send finish flag to coordinator')
                redis_set('available', False)
                set_step('finished')
                return jsonify({'finished': True})
        else:
            # broadcast data to clients (as coordinator)
            redis_set('available', False)
            global_model = redis_get('global_model')
            return jsonify({'global_model': global_model})

    else:
        current_app.logger.info(
            '[API] Wrong request type, only GET and POST allowed')
        return jsonify(True)
Exemplo n.º 12
0
def write_results(output_dir: str, model=None, score=None, plot=None):
    """
    Writes the results of global_km to the output_directory.
    :param results: Global results calculated from the local counts of the clients
    :param output_dir: String of the output directory. Usually /mnt/output
    :return: None
    """
    current_app.logger.info("[API] Write results to output folder")

    if model is not None:
        # save the model to disk
        filename = output_dir + '/' + 'global_boosting_classifier.sav'
        pickle.dump(model, open(filename, 'wb'))
    if score is not None:
        filename = output_dir + '/eval_on_local_testset.csv'
        score_df = pd.DataFrame(
            index=["local_model", "global_model"],
            columns=[redis_get("metric")],
            data=[redis_get("score_single"),
                  redis_get("score_combined")])
        score_df.to_csv(filename)
    if plot is not None:
        filename = 'plot.png'
        plot.savefig(filename)
Exemplo n.º 13
0
def params():
    """
    :return: current parameter values as a HTML page
    """
    is_coordinator = redis_get('is_coordinator')
    step = redis_get('step')
    local_data = redis_get('local_data')
    global_data = redis_get('global_data')
    data = redis_get('data')
    available = redis_get('available')
    return f"""
Exemplo n.º 14
0
def build_model(x_train, x_test, y_train, y_test):
    lr = float(redis_get("learning_rate"))
    if redis_get("random_state"):
        random_state = redis_get("random_state")
    else:
        random_state = None
    # Create adaboost object
    obj = AdaBoostClassifier(n_estimators=redis_get("n_estimators"),
                             learning_rate=lr,
                             random_state=random_state)
    # Train Adaboost
    model = obj.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    if "acc" in redis_get("metric"):
        score = accuracy_score(y_test, y_pred)
    elif "matth" in redis_get("metric"):
        score = matthews_corrcoef(y_test, y_pred)
    elif "roc" in redis_get("metric") or "auc" in redic_get("metric"):
        score = roc_auc_score(y_test, y_pred)
    else:
        score = accuracy_score(y_test, y_pred)
    return score, model
Exemplo n.º 15
0
def status():
    """
    GET request to /status, if True is returned a GET data request will be send
    :return: JSON with key 'available' and value True or False and 'finished' value True or False
    """
    available = redis_get('available')
    current_app.logger.info('[API] /status GET request ' + str(available) +
                            ' - [STEP]: ' + str(get_step()))

    if get_step() == 'start':
        current_app.logger.info('[STEP] start')
        current_app.logger.info('[API] Federated Boosting App')

    elif get_step() == 'local_calculation':
        current_app.logger.info('[STEP] local_calculation')
        model = calculate_local_model()

        if redis_get('is_coordinator'):
            # if this is the coordinator, directly add the local model to the global_data list
            global_data = redis_get('global_data')
            global_data.append(model)
            redis_set('global_data', global_data)
            current_app.logger.info('[STEP] : waiting_for_clients')
        else:
            # if this is a client, set the local model to local_data and set available to true
            redis_set('local_data', model)
            current_app.logger.info('[STEP] waiting_for_coordinator')
            redis_set('available', True)

        set_step('waiting')

    elif get_step() == 'waiting':
        current_app.logger.info('[STEP] waiting')
        if redis_get('is_coordinator'):
            # check if all clients have sent their models already
            has_client_model_arrived()
        else:
            # the clients wait for the coordinator to finish
            current_app.logger.info(
                '[API] Client waiting for coordinator to finish')

    elif get_step() == 'global_calculation':
        # as soon as all data has arrived the global calculation starts
        current_app.logger.info('[STEP] global_calculation')
        calculate_global_model()
        set_step("broadcast_results")

    elif get_step() == 'broadcast_results':
        # the result is broadcasted to the clients
        current_app.logger.info('[STEP] broadcast_results')
        current_app.logger.info('[API] Share global results with clients')
        redis_set('available', True)
        set_step('test_results')

    elif get_step() == 'test_results':
        current_app.logger.info('[STEP] test_results')
        score = calculate_average()
        redis_set("score_of_global_model_on_local_test_set", score)
        current_app.logger.info('[API] Finalize client')
        set_step('write_results')
    elif get_step() == 'write_results':
        current_app.logger.info('[STEP] Write Results')
        write_results(
            output_dir=OUTPUT_DIR,
            model=redis_get("global_model"),
            score=redis_get("score_of_global_model_on_local_test_set"),
            plot=None)
        if redis_get('is_coordinator'):
            # The coordinator is already finished now
            redis_set('finished', [True])
        # Coordinator and clients continue with the finalize step
        set_step("finalize")

    elif get_step() == 'finalize':
        current_app.logger.info('[STEP] finalize')
        current_app.logger.info("[API] Finalize")
        if redis_get('is_coordinator'):
            # The coordinator waits until all clients have finished
            if have_clients_finished():
                current_app.logger.info('[API] Finalize coordinator.')
                set_step('finished')
            else:
                current_app.logger.info(
                    '[API] Not all clients have finished yet.')
        else:
            # The clients set available true to signal the coordinator that they have written the results.
            redis_set('available', True)

    elif get_step() == 'finished':
        # All clients and the coordinator set available to False and finished to True and the computation is done
        current_app.logger.info('[STEP] finished')
        return jsonify({'available': False, 'finished': True})

    return jsonify({
        'available': True if available else False,
        'finished': False
    })