def local_1(args, computation_phase):
    input_list = args['input']
    X = input_list['covariates']
    y = input_list['dependents']
    lamb = input_list['lambda']
    biased_X = np.insert(X, 0, 1, axis=1)
    ## step 1 : generate the local beta_vector
    beta_vector = reg.one_shot_regression(X, y, lamb)

    ## step 2: generate the local fit statistics local r^2, t and p
    r_2 = reg.r_square(biased_X, y, beta_vector)
    ts_beta = reg.t_value(biased_X, y, beta_vector)
    dof = len(y) - len(beta_vector)
    ps_beta = reg.t_to_p(dof, ts_beta)

    ## step 3: generate the mean_y_local and count_local
    mean_y_local = np.mean(y)
    count_local = len(y)

    computation_output = json.dumps(
        {
            'output': {
                'beta_vector_local': beta_vector.tolist(),
                'r_2_local': r_2,
                'ts_local': ts_beta.tolist(),
                'ps_local': ps_beta,
                'mean_y_local': mean_y_local,
                'count_local': count_local,
                'computation_phase': computation_phase
            },
            'cache': {
                'covariates': X,
                'dependents': y,
                'lambda': lamb
            }
        },
        sort_keys=True,
        indent=4,
        separators=(',', ': '))
    return computation_output
def remote_2(args, computation_phase):
    ##  calculate the global model fit statistics,r_2_global, ts_global, ps_global
    cache_list = args['cache']
    input_list = args['input']
    avg_beta_vector = cache_list['avg_beta_vector']
    dof_global = cache_list['dof_global']
    n_site = len(input_list)
    SSE_global = 0
    SST_global = 0
    varX_matrix_global = []
    for i in range(0, n_site):
        SSE_global = SSE_global + input_list[i]['SSE_local']
        SST_global = SST_global + input_list[i]['SST_local']
        varX_matrix_global = varX_matrix_global + input_list[i]['varX_matrix_local']

    r_2_global = 1 - (SSE_global/SST_global)
    MSE = (1/dof_global)*SSE_global
    var_beta_global = MSE*(np.linalg.inv(varX_matrix_global).diagonal())
    se_beta_global = np.sqrt(var_beta_global)
    ts_global = avg_beta_vector / se_beta_global
    ps_global = reg.t_to_p(dof_global,ts_global)

    computation_output = json.dumps({'output':{'avg_beta_vector': cache_list['avg_beta_vector'],
                                               'beta_vector_local': cache_list['beta_vector_local'],
                                               'r_2_global': r_2_global,
                                               'ts_global': ts_global,
                                               'ps_global': ps_global,
                                               'r_2_local': cache_list['r_2_local'],
                                               'ts_local': cache_list['ts_local'],
                                               'ps_local': cache_list['ps_local'],
                                               'dof_global': cache_list['dof_global'],
                                               'dof_local': cache_list['dof_local'],  
                                               'complete': True}
                                    },
                                    sort_keys=True, indent=4, separators=(',', ': '))
    return computation_output
Пример #3
0
def remote_4(args):
    """
    Computes the global model fit statistics, r_2_global, ts_global, ps_global

    Args:
        args (dictionary): {"input": {
                                "SSE_local": ,
                                "SST_local": ,
                                "varX_matrix_local": ,
                                "computation_phase":
                                },
                            "cache":{},
                            }

    Returns:
        computation_output (json) : {"output": {
                                        "avg_beta_vector": ,
                                        "beta_vector_local": ,
                                        "r_2_global": ,
                                        "ts_global": ,
                                        "ps_global": ,
                                        "dof_global":
                                        },
                                    "success":
                                    }
    Comments:
        Generate the local fit statistics
            r^2 : goodness of fit/coefficient of determination
                    Given as 1 - (SSE/SST)
                    where   SSE = Sum Squared of Errors
                            SST = Total Sum of Squares
            t   : t-statistic is the coefficient divided by its standard error.
                    Given as beta/std.err(beta)
            p   : two-tailed p-value (The p-value is the probability of
                  seeing a result as extreme as the one you are
                  getting (a t value as large as yours)
                  in a collection of random data in which
                  the variable had no effect.)

    """
    input_list = args["input"]
    y_labels = args["cache"]["y_labels"]
    all_local_stats_dicts = args["cache"]["all_local_stats_dicts"]

    cache_list = args["cache"]
    avg_beta_vector = cache_list["avg_beta_vector"]
    dof_global = cache_list["dof_global"]

    SSE_global = sum(
        [np.array(input_list[site]["SSE_local"]) for site in input_list])
    SST_global = sum(
        [np.array(input_list[site]["SST_local"]) for site in input_list])
    varX_matrix_global = sum([
        np.array(input_list[site]["varX_matrix_local"]) for site in input_list
    ])

    r_squared_global = 1 - (SSE_global / SST_global)
    MSE = SSE_global / np.array(dof_global)

    ts_global = []
    ps_global = []

    for i in range(len(MSE)):
        var_covar_beta_global = MSE[i] * sp.linalg.inv(varX_matrix_global)
        se_beta_global = np.sqrt(var_covar_beta_global.diagonal())
        ts = avg_beta_vector[i] / se_beta_global
        ps = reg.t_to_p(ts, dof_global[i])
        ts_global.append(ts)
        ps_global.append(ps)

    # Block of code to print local stats as well
    sites = ['Site_' + str(i) for i in range(len(all_local_stats_dicts))]

    all_local_stats_dicts = list(map(list, zip(*all_local_stats_dicts)))

    a_dict = [{
        key: value
        for key, value in zip(sites, all_local_stats_dicts[i])
    } for i in range(len(all_local_stats_dicts))]

    # Block of code to print just global stats
    keys1 = [
        "avg_beta_vector", "r2_global", "ts_global", "ps_global", "dof_global"
    ]
    global_dict_list = []

    for index, _ in enumerate(y_labels):
        values = [
            avg_beta_vector[index], r_squared_global[index],
            ts_global[index].tolist(), ps_global[index], dof_global[index]
        ]
        my_dict = {key: value for key, value in zip(keys1, values)}
        global_dict_list.append(my_dict)

    # Print Everything
    dict_list = []
    keys2 = ["ROI", "global_stats", "local_stats"]
    for index, label in enumerate(y_labels):
        values = [label, global_dict_list[index], a_dict[index]]
        my_dict = {key: value for key, value in zip(keys2, values)}
        dict_list.append(my_dict)

    computation_output = {
        "output": {
            "regressions": dict_list
        },
        "success": True
    }

    return json.dumps(computation_output)
Пример #4
0
def remote_3(args):
    """
    Computes the global model fit statistics, r_2_global, ts_global, ps_global

    Args:
        args (dictionary): {"input": {
                                "SSE_local": ,
                                "SST_local": ,
                                "varX_matrix_local": ,
                                "computation_phase":
                                },
                            "cache":{},
                            }

    Returns:
        computation_output (json) : {"output": {
                                        "avg_beta_vector": ,
                                        "beta_vector_local": ,
                                        "r_2_global": ,
                                        "ts_global": ,
                                        "ps_global": ,
                                        "dof_global":
                                        },
                                    "success":
                                    }
    Comments:
        Generate the local fit statistics
            r^2 : goodness of fit/coefficient of determination
                    Given as 1 - (SSE/SST)
                    where   SSE = Sum Squared of Errors
                            SST = Total Sum of Squares
            t   : t-statistic is the coefficient divided by its standard error.
                    Given as beta/std.err(beta)
            p   : two-tailed p-value (The p-value is the probability of
                  seeing a result as extreme as the one you are
                  getting (a t value as large as yours)
                  in a collection of random data in which
                  the variable had no effect.)

    """
    input_list = args["input"]

    cache_list = args["cache"]
    avg_beta_vector = cache_list["avg_beta_vector"]
    dof_global = cache_list["dof_global"]

    SSE_global = np.sum([input_list[site]["SSE_local"] for site in input_list])
    SST_global = np.sum([input_list[site]["SST_local"] for site in input_list])
    varX_matrix_global = sum([
        np.array(input_list[site]["varX_matrix_local"]) for site in input_list
    ])

    r_squared_global = 1 - (SSE_global / SST_global)
    MSE = SSE_global / dof_global
    var_covar_beta_global = MSE * sp.linalg.inv(varX_matrix_global)
    se_beta_global = np.sqrt(var_covar_beta_global.diagonal())
    ts_global = avg_beta_vector / se_beta_global
    ps_global = reg.t_to_p(ts_global, dof_global)

    computation_output = {
        "output": {
            "avg_beta_vector": cache_list["avg_beta_vector"],
            "r_2_global": r_squared_global,
            "ts_global": ts_global.tolist(),
            "ps_global": ps_global,
            "dof_global": cache_list["dof_global"]
        },
        "success": True
    }

    return json.dumps(computation_output)
def local_1(args, computation_phase):
    """Computes local beta vector and local fit statistics

    Args:
        args (dictionary) : {"input":
                                {"covariates": ,
                                 "dependents": ,
                                 "lambda":
                                },
                            "cache": {}
                            }

        computation_phase (string) : Field specifying which part (local/
                                     remote) of the decentralized computation
                                     has been performed last
                                     In this case, it has to be empty
    Returns:
        computation_output(json) : {
                                        'output': {
                                            'beta_vector_local': ,
                                            'r_2_local': ,
                                            'ts_local': ,
                                            'ps_local': ,
                                            'mean_y_local': ,
                                            'count_local': ,
                                            'computation_phase':
                                        },
                                        'cache': {
                                            'covariates': ,
                                            'dependents': ,
                                            'lambda':
                                        }
                                    }

    Comments:
        Step 1 : Generate the local beta_vector
        Step 2 : Generate the local fit statistics
                    r^2 : goodness of fit/coefficient of determination
                          Given as 1 - (SSE/SST)
                          where  SSE = Sum Squared of Errors
                                 SST = Total Sum of Squares
                    t   : t-statistic is the coefficient divided by
                          its standard error.
                          Given as beta/std.err(beta)
                    p   : two-tailed p-value (The p-value is the probability of
                          seeing a result as extreme as the one you are
                          getting (a t value as large as yours)
                          in a collection of random data in which
                          the variable had no effect.)
        Step 3 : Compute mean_y_local and length of target values
    """
    input_list = args['input']
    X = input_list['covariates']
    y = input_list['dependents']
    lamb = input_list['lambda']
    biased_X = np.insert(X, 0, 1, axis=1)

    beta_vector = reg.one_shot_regression(X, y, lamb)

    r_squared = reg.r_square(biased_X, y, beta_vector)
    ts_beta = reg.t_value(biased_X, y, beta_vector)
    dof = len(y) - len(beta_vector)
    ps_beta = reg.t_to_p(ts_beta, dof)

    computation_output_dict = {
        'output': {
            'beta_vector_local': beta_vector.tolist(),
            'r_2_local': r_squared,
            'ts_local': ts_beta.tolist(),
            'ps_local': ps_beta,
            'mean_y_local': np.mean(y),
            'count_local': len(y),
            'computation_phase': computation_phase
        },
        'cache': {
            'covariates': X,
            'dependents': y,
            'lambda': lamb
        }
    }

    return json.dumps(computation_output_dict,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
Пример #6
0
def remote_2(args):
    """
    # calculate the global model fit statistics, r_2_global, ts_global,
    # ps_global
    Args:
        args (dictionary): {
                                'output': {
                                    'SSE_local': ,
                                    'SST_local': ,
                                    'varX_matrix_local': ,
                                    'computation_phase':
                                }
                            }

        computation_phase (String): field specifying which part (local/remote)
                                   of the decentralized computation has been
                                   performed last
                                   In this case, it has to be empty
    Returns:
        computation_output (json) : {
                                        'output': {
                                            'avg_beta_vector': ,
                                            'beta_vector_local': ,
                                            'r_2_global': ,
                                            'ts_global': ,
                                            'ps_global': ,
                                            'r_2_local': ,
                                            'ts_local': ,
                                            'ps_local': ,
                                            'dof_global': ,
                                            'dof_local': ,
                                            'complete':
                                        }
                                    }
    """
    cache_list = args['cache']
    input_list = args['input']
    avg_beta_vector = cache_list['avg_beta_vector']
    dof_global = cache_list['dof_global']

    SSE_global = np.sum([input_list[site]['SSE_local'] for site in input_list])
    SST_global = np.sum([input_list[site]['SST_local'] for site in input_list])
    varX_matrix_global = sum([
        np.array(input_list[site]['varX_matrix_local']) for site in input_list
    ])

    r_squared_global = 1 - (SSE_global / SST_global)
    MSE = SSE_global / dof_global
    var_covar_beta_global = MSE * np.linalg.inv(varX_matrix_global)
    se_beta_global = np.sqrt(var_covar_beta_global.diagonal())
    ts_global = avg_beta_vector / se_beta_global
    ps_global = reg.t_to_p(ts_global, dof_global)

    computation_output_dict = {
        'output': {
            'avg_beta_vector': cache_list['avg_beta_vector'],
            'r_2_global': r_squared_global,
            'ts_global': ts_global.tolist(),
            'ps_global': ps_global,
            'dof_global': cache_list['dof_global'],
        },
        'success': True
    }

    return json.dumps(computation_output_dict)